Extended Druggable siRNA library

# Get data
if(file.exists(path_extdrg)){
  
    extdrg <- read_csv(path_extdrg)
    
} else{

    df <- read_csv(src_extdrg_prihits) %>%
          transmute(gene  = sub("^(.*)_(.*?)$", "\\1", treatment),
                    sirna = sub("^(.*)_(.*?)$", "\\2", treatment),
                    score = median) %>%
          filter(!grepl("Scrambled", gene)) %>%
          filter(!grepl("vx809", gene)) %>%
          filter(!grepl("empty", gene)) %>%
          group_by(gene) %>%
          summarise(score  = mean(score),
                    nsirna = n())
    
    gene0         <- df$gene                                                    # Get gene symbols
    up            <- genesymbol_to_uniprot(gene0)                               # Convert gene symbols to uniprot
    gene_na       <- gene0[which(is.na(up))]                                    # Gene symbols for which a Uniprot ID could not be found
    up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"]       # Manually add missing uniprot ids
    gene          <- uniprot_to_genesymbol(up)
    ensg          <- uniprot_to_ensembl(up)
    id            <- uniprot_to_geneid(up)
    gname         <- uniprot_to_genename(up, parallelize=TRUE)

    extdrg <- tibble(symbol  = gene,
                     uniprot = up,
                     ensembl = ensg,
                     entrez  = id,
                     name    = gname,
                     score   = df$score,
                     nsirna  = df$nsirna)

    write.csv(extdrg, path_extdrg, row.names = FALSE)
}

extdrgF <- extdrg %>%
           group_by(uniprot) %>%
           slice(1) %>%                # Remove duplicate uniprot IDs
           ungroup() %>%
           filter(!is.na(uniprot))     # Remove unknowns

# Library statistics (mapped genes)
n_extdrg         <- nrow(extdrg)
n_extdrg_siRNAs  <- sum(extdrg$nsirna)
n_extdrgF        <- nrow(extdrgF)
n_extdrgF_siRNAs <- sum(extdrgF$nsirna)

All data from EXTDRG library:

After name updating:

CFTR traffic hits: Primary screen

# Get data
if(file.exists(path_extdrg_prihits)){
  
    extdrg_prihits <- read_csv(path_extdrg_prihits)
    
} else{

    df <- read_csv(src_extdrg_prihits) %>%
          transmute(gene  = sub("^(.*)_(.*?)$", "\\1", treatment),
                    sirna = sub("^(.*)_(.*?)$", "\\2", treatment),
                    score = median) %>%
          filter(!grepl("Scrambled", gene)) %>%
          filter(!grepl("vx809", gene)) %>%
          filter(!grepl("empty", gene)) %>%
          filter(score >= 2) %>%
          group_by(gene) %>%
          summarise(score  = mean(score),
                    nsirna = n()) %>%
          arrange(desc(score))
    
    gene0         <- df$gene                                                    # Get gene symbols
    up            <- genesymbol_to_uniprot(gene0)                               # Convert gene symbols to uniprot
    gene_na       <- gene0[which(is.na(up))]                                    # Gene symbols for which a Uniprot ID could not be found
    up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"]       # Manually add missing uniprot ids
    gene          <- uniprot_to_genesymbol(up)
    ensg          <- uniprot_to_ensembl(up)
    id            <- uniprot_to_geneid(up)
    gname         <- uniprot_to_genename(up, parallelize=TRUE)

    extdrg_prihits <- tibble(symbol  = gene,
                             uniprot = up,
                             ensembl = ensg,
                             entrez  = id,
                             name    = gname,
                             score   = df$score,
                             nsirna  = df$nsirna)

    write.csv(extdrg_prihits, path_extdrg_prihits, row.names = FALSE)
}


extdrg_prihitsF <- extdrg_prihits %>%
                   group_by(uniprot) %>%
                   slice(1) %>%                # Remove duplicate uniprot IDs
                   ungroup() %>%
                   filter(!is.na(uniprot))     # Remove unknowns

# Library statistics (mapped genes)
n_extdrg_prihits         <- nrow(extdrg_prihits)
n_extdrg_prihits_siRNAs  <- sum(extdrg_prihits$nsirna)
n_extdrg_prihitsF        <- nrow(extdrg_prihitsF)
n_extdrg_prihitsF_siRNAs <- sum(extdrg_prihitsF$nsirna)

All data from EXTDRG primary hits:

After name updating:

CFTR traffic hits: Confirmation screen

# Get data
if(file.exists(path_extdrg_cnfhits)){
  
    extdrg_cnfhits <- read_csv(path_extdrg_cnfhits)
    
} else{

    df <- read_csv(src_extdrg_cnfhits) %>%
          mutate(gene  = sub("^(.*)_(.*?)$", "\\1", treatment),
                 sirna = sub("^(.*)_(.*?)$", "\\2", treatment),
                 score = F508del) %>%
          filter(!grepl("^Scrambled", gene)) %>%
          filter(!grepl("^Neg", gene)) %>%
          filter(!Control) %>%
          filter(hit_newscore227) %>%
          filter(!is.na(score)) %>%
          select(gene, sirna, score, Uniprot) %>%    
          filter(score >= 1) %>%
          group_by(gene, Uniprot) %>%
          summarise(score  = mean(score),
                    nsirna = n()) %>%
          arrange(desc(score))
    
    up            <- df$Uniprot
    gene          <- uniprot_to_genesymbol(up)
    ensg          <- uniprot_to_ensembl(up)
    id            <- uniprot_to_geneid(up)
    gname         <- uniprot_to_genename(up, parallelize=TRUE)

    extdrg_cnfhits <- tibble(symbol  = gene,
                             uniprot = up,
                             ensembl = ensg,
                             entrez  = id,
                             name    = gname,
                             score   = df$score,
                             nsirna  = df$nsirna)

    write.csv(extdrg_cnfhits, path_extdrg_cnfhits, row.names = FALSE)
}

extdrg_cnfhitsF <- extdrg_cnfhits %>%
                   group_by(uniprot) %>%
                   slice(1) %>%                # Remove duplicate uniprot IDs
                   ungroup() %>%
                   filter(!is.na(uniprot))     # Remove unknowns

# Library statistics (mapped genes)
n_extdrg_cnfhits         <- nrow(extdrg_cnfhits)
n_extdrg_cnfhits_siRNAs  <- sum(extdrg_cnfhits$nsirna)
n_extdrg_cnfhitsF        <- nrow(extdrg_cnfhitsF)
n_extdrg_cnfhitsF_siRNAs <- sum(extdrg_cnfhitsF$nsirna)

All data from EXTDRG confirmed hits:

After name updating:

id symbol uniprot name
1 ACSBG2 Q5FVE4 Long-chain-fatty-acid–CoA ligase ACSBG2
2 ADAMTS6 Q9UKP5 A disintegrin and metalloproteinase with thrombospondin motifs 6
3 APOB P04114 Apolipoprotein B-100
4 ARVCF O00192 Armadillo repeat protein deleted in velo-cardio-facial syndrome
5 CCL27 Q9Y4X3 C-C motif chemokine 27
6 CITED2 Q99967 Cbp/p300-interacting transactivator 2
7 CLDN4 O14493 Claudin-4
8 COL5A1 P20908 Collagen alpha-1(V) chain
9 CPZ Q66K79 Carboxypeptidase Z
10 CREBBP Q92793 CREB-binding protein
11 CYSLTR2 Q9NS75 Cysteinyl leukotriene receptor 2
12 DCSTAMP Q9H295 Dendritic cell-specific transmembrane protein
13 EVI5L Q96CN4 EVI5-like protein
14 FOLR2 P14207 Folate receptor beta
15 GJB2 P29033 Gap junction beta-2 protein
16 GUCY2D Q02846 Retinal guanylyl cyclase 1
17 HELLS Q9NRZ9 Lymphoid-specific helicase
18 IL24 Q13007 Interleukin-24
19 ISL2 Q96A47 Insulin gene enhancer protein ISL-2
20 KIF17 Q9P2E2 Kinesin-like protein KIF17
21 LDLRAD3 Q86YD5 Low-density lipoprotein receptor class A domain-containing protein 3
22 LIN9 Q5TKA1 Protein lin-9 homolog
23 LRRK1 Q38SD2 Leucine-rich repeat serine/threonine-protein kinase 1
24 NOVA1 P51513 RNA-binding protein Nova-1
25 NTNG2 Q96CW9 Netrin-G2
26 OXSR1 O95747 Serine/threonine-protein kinase OSR1
27 PCDHB2 Q9Y5E7 Protocadherin beta-2
28 QRSL1 Q9H0R6 Glutamyl-tRNA(Gln) amidotransferase subunit A, mitochondrial
29 RECQL5 O94762 ATP-dependent DNA helicase Q5
30 SLC30A1 Q9Y6M5 Zinc transporter 1
31 STYK1 Q6J9G0 Tyrosine-protein kinase STYK1
32 TPK1 Q9H3S4 Thiamin pyrophosphokinase 1
33 VPS26A O75436 Vacuolar protein sorting-associated protein 26A
34 ZNF141 Q15928 Zinc finger protein 141
35 ZNF384 Q8TF68 Zinc finger protein 384

Pankow CFTR interactome (2015)

Nature (2015) 528, 510–516
deltaF508 CFTR interactome remodelling promotes rescue of cystic fibrosis
Sandra Pankow, Casimir Bamberger, Diego Calzolari, Salvador Martínez-Bartolomé, Mathieu Lavallée-Adam, William E. Balch & John R. Yates III
https://doi.org/10.1038/nature15729

Immunoprecipitation-based proteomic-profiling of isogenic HBE41o- WT- and F508del-CFTR cells. Besides wt- and F508del-CFTR interactors (direct and indirect) a core CFTR interactome is defined: proteins which interact with at least one CFTR variant.

The authors develop and apply a co-purifying protein identification technology (CoPIT), an IP-based proteomic-profiling approach of protein-protein interactions. They first determined the changes that occur between the WT- and F508del-CFTR interactome in HBE41o- and CFBE41o- bronchial epithelial cell lines. They also study interactome changes upon temperature shift (to 30ºC for 1h, 6h and 24h) and HDACi treatment in CFBE cells.

The most important outcome of this study are the interactomes of WT-CFTR (Table S2) and F508del-CFTR (Table S3) which comprise 395 and 526 proteins, respectively, with an overlap of more than 85% or 638 proteins (Table S1).

# Core CFTR interactome
# Get data
if(file.exists(path_pankow2015_core)){
  
    pankow2015_core <- read_csv(path_pankow2015_core)
    
} else{

    tmp   <- paste0(tempfile(), ".xlsx")
    download.file(url = src_pankow2015_core, destfile = tmp, mode="wb")

    df    <- tibble(read.xlsx(tmp, sheet = 1))
    up    <- df$Uniprot_Entry
    gene  <- uniprot_to_genesymbol(up)                                            # Get gene symbols
    up_na <- up[which(is.na(gene))]                                               # Gene uniprot IDs for which a gene symbol could not be found
    gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]       # Add missing gene symbols
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)

    pankow2015_core  <- tibble(symbol  = gene,
                               uniprot = up,
                               ensembl = ensg,
                               entrez  = id,
                               name    = gname)
      
    write.csv(pankow2015_core, path_pankow2015_core, row.names = FALSE)
}

# Library statistics (mapped genes)
pankow2015_coreF   <- pankow2015_core %>%
                      group_by(uniprot) %>%
                      slice(1) %>%                # Remove duplicate uniprot IDs
                      ungroup() %>%
                      filter(!is.na(uniprot))     # Remove unknowns

n_pankow2015_core  <- nrow(pankow2015_core)
n_pankow2015_coreF <- nrow(pankow2015_coreF)



# wt-CFTR interactome
# Get data
if(file.exists(path_pankow2015_wt)){
  
    pankow2015_wt <- read_csv(path_pankow2015_wt)
    
} else{

    tmp   <- paste0(tempfile(), ".xlsx")
    download.file(url = src_pankow2015_wt, destfile = tmp, mode="wb")

    # gene symbols of wt-CFTR interactors
    df <-    tmp %>%
             read.xlsx(sheet = 2) %>%
             dplyr::select(GeneSymbol, Protein1_Genemania_ID) %>%
             filter(!is.na(GeneSymbol)) %>%
             filter(GeneSymbol != "CFTR")
    up    <- pankow2015_core %>%
             filter(symbol %in% df$GeneSymbol) %>%
             pull(uniprot)
    gene  <- uniprot_to_genesymbol(up)                                            # Get gene symbols
    up_na <- up[which(is.na(gene))]                                               # Gene uniprot IDs for which a gene symbol could not be found
    gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]       # Add missing gene symbols
    id    <- uniprot_to_geneid(up)
    ensg  <- uniprot_to_ensembl(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)
    
    pankow2015_wt  <- tibble(symbol  = gene,
                             uniprot = up,
                             ensembl = ensg,
                             entrez  = id,
                             name    = gname)
    
    write.csv(pankow2015_wt, path_pankow2015_wt, row.names = FALSE)
}

# Library statistics (mapped genes)
pankow2015_wtF   <- pankow2015_wt %>%
                    group_by(uniprot) %>%
                    slice(1) %>%                # Remove duplicate uniprot IDs
                    ungroup() %>%
                    filter(!is.na(uniprot))     # Remove unknowns

n_pankow2015_wt  <- nrow(pankow2015_wt)
n_pankow2015_wtF <- nrow(pankow2015_wtF)











# F508del-CFTR interactome
# Get data
if(file.exists(path_pankow2015_df)){
  
    pankow2015_df <- read_csv(path_pankow2015_df)
    
} else{

    tmp   <- paste0(tempfile(), ".xlsx")
    download.file(url = src_pankow2015_df, destfile = tmp, mode="wb")

    # gene symbols of F508del-CFTR interactors
    df <-    tmp %>%
             read.xlsx(sheet = 3) %>%
             dplyr::select(GeneSymbol, Protein1_Genemania_ID) %>%
             filter(!is.na(GeneSymbol)) %>%
             filter(GeneSymbol != "CFTR")
    up    <- pankow2015_core %>%
             filter(symbol %in% df$GeneSymbol) %>%
             pull(uniprot)
    gene  <- uniprot_to_genesymbol(up)                                            # Get gene symbols
    up_na <- up[which(is.na(gene))]                                               # Gene uniprot IDs for which a gene symbol could not be found
    gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]       # Add missing gene symbols
    id    <- uniprot_to_geneid(up)
    ensg  <- uniprot_to_ensembl(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)
    
    pankow2015_df  <- tibble(symbol  = gene,
                             uniprot = up,
                             ensembl = ensg,
                             entrez  = id,
                             name    = gname)
    
    write.csv(pankow2015_df, path_pankow2015_df, row.names = FALSE)
}

pankow2015_dfF   <- pankow2015_df %>%
                    group_by(uniprot) %>%
                    slice(1) %>%                # Remove duplicate uniprot IDs
                    ungroup() %>%
                    filter(!is.na(uniprot))     # Remove unknowns

# Library statistics (mapped genes)
n_pankow2015_df  <- nrow(pankow2015_df)
n_pankow2015_dfF <- nrow(pankow2015_dfF)

Core CFTR interactome:
* All data: 638 genes.
* After name updating: 637 genes.

wt-CFTR interactome:
* All data: 364 genes.
* After name updating: 363 genes.

F508del-CFTR interactome:
* All data: 481 genes.
* After name updating: 480 genes.

Primary traffic hits & Pankow2018 (core)

id symbol uniprot name
1 BAG3 O95817 BAG family molecular chaperone regulator 3
2 CCT5 P48643 T-complex protein 1 subunit epsilon
3 MMS19 Q96T76 MMS19 nucleotide excision repair protein homolog
4 MYH14 Q7Z406 Myosin-14
5 NCBP2 P52298 Nuclear cap-binding protein subunit 2
6 PABPC3 Q9H361 Polyadenylate-binding protein 3
7 PSMA6 P60900 Proteasome subunit alpha type-6
8 PSMB1 P20618 Proteasome subunit beta type-1
9 UQCRC1 P31930 Cytochrome b-c1 complex subunit 1, mitochondrial

Primary traffic hits & Pankow2018 (F508del)

id symbol uniprot name
1 BAG3 O95817 BAG family molecular chaperone regulator 3
2 CCT5 P48643 T-complex protein 1 subunit epsilon
3 MMS19 Q96T76 MMS19 nucleotide excision repair protein homolog
4 MYH14 Q7Z406 Myosin-14
5 NCBP2 P52298 Nuclear cap-binding protein subunit 2
6 PABPC3 Q9H361 Polyadenylate-binding protein 3
7 PSMA6 P60900 Proteasome subunit alpha type-6
8 PSMB1 P20618 Proteasome subunit beta type-1
9 UQCRC1 P31930 Cytochrome b-c1 complex subunit 1, mitochondrial

Canato CFTR interactome (2018)

Cell Mol Life Sci (2018) 75(24):4495-4509
Proteomic interaction profiling reveals KIFC1 as a factor involved in early targeting of F508del-CFTR to degradation
Sara Canato, João D Santos, Ana S Carvalho, Kerman Aloria, Margarida D Amaral, Rune Matthiesen, André O Falcao, Carlos M Farinha
https://doi.org/10.1007/s00018-018-2896-7

Total interactome identified for F508del-CFTR and F508del-4RK-CFTR. Protein complexes were isolated by pulling-down CFTR from CFBE cells expressing these two variants.

The authors used a proteomic profiling approach coupled to global bioinformatic analysis to investigate the differential protein-protein interactions (PPIs) of F508del-CFTR vs F508del-4RK-CFTR (which allows F508delCFTR to escape the ERQC and consequently to reach the plasma membrane (PM) without, however, a significant correction in its folding). This approach resulted in the mapping of the comparative interactome of F508del-CFTR and F508del-4RK-CFTR to identify biological pathways dictating ER retention and degradation of F508del-CFTR and novel putative therapeutic targets in CF.

An important outcome of this study is a F508del-CFTR interactome from CFBE cells expressing this variant, which can be found in Table S1 and comprises 828 proteins. An interactome for F508del-4RK-CFTR is also provided. Furthermore, they compare their F508del-CFTR interactome with the “core CFTR interactome” found by Pankow and collaborators and find 217 common proteins corresponding to 26% of their complete list of interactors. This suggests that this work by Canato et al has additional interactors which might be of interest.

# F508del-CFTR interactome
# Get data
if(file.exists(path_canato2018_df)){
  
    canato2018_df <- read_csv(path_canato2018_df)
    
} else{

    tmp    <- tempfile()
    download.file(url = src_canato2018, destfile = tmp, mode="wb")
    df      <- tmp %>%
                  read.xlsx(cols=c(4:6, 10)) %>%
                  tibble() %>%
                  mutate(across(starts_with("DF"), as.numeric)) %>%
                  mutate(up0 = sub("^.*?\\|(.*?)\\|(.*?)_.*$", "\\1", Head),
                         gn0 = sub("^.*?\\|(.*?)\\|(.*?)_.*$", "\\2", Head),
                         sum = rowSums(across(starts_with("DF")))) %>%
                  filter(sum > 0)
      
    up      <- df$up0
    gene    <- uniprot_to_genesymbol(up)
    up_na   <- up[which(is.na(gene))]
    gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
    ensg    <- uniprot_to_ensembl(up)
    id      <- uniprot_to_geneid(up)
    gname   <- uniprot_to_genename(up, parallelize = TRUE)
    
    canato2018_df <- tibble(symbol  = gene,
                            uniprot = up,
                            ensembl = ensg,
                            entrez  = id,
                            name    = gname)
      
    write.csv(canato2018_df, path_canato2018_df, row.names = FALSE)
}

# Library statistics (mapped genes)
bad_up         <- extraids %>%
                  filter(!is.na(uniprot) & !in_database & uniprot %in% canato2018_df$uniprot) %>% 
                  pull(uniprot)                       # Deprecated Uniprot IDs

canato2018_dfF <- canato2018_df %>%
                  group_by(uniprot) %>%
                  slice(1) %>%                # Remove duplicate uniprot IDs
                  ungroup() %>%
                  filter(!is.na(uniprot)) %>%    # Remove unknowns
                  filter(!uniprot %in% bad_up)   # Uniprots not in the database

n_canato2018_df  <- nrow(canato2018_df)
n_canato2018_dfF <- nrow(canato2018_dfF)



# 4RK-CFTR interactome
# Get data
if(file.exists(path_canato2018_4RK)){
  
    canato2018_4RK <- read_csv(path_canato2018_4RK)
    
} else{

    tmp    <- tempfile()
    download.file(url = src_canato2018, destfile = tmp, mode="wb")
    df      <- tmp %>%
                  read.xlsx(cols=c(1:3, 10)) %>%
                  tibble() %>%
                  mutate(across(starts_with("4RK"), as.numeric)) %>%
                  mutate(up0 = sub("^.*?\\|(.*?)\\|(.*?)_.*$", "\\1", Head),
                         gn0 = sub("^.*?\\|(.*?)\\|(.*?)_.*$", "\\2", Head),
                         sum = rowSums(across(starts_with("4RK")))) %>%
                  filter(sum > 0)
      
    up      <- df$up0
    gene    <- uniprot_to_genesymbol(up)
    up_na   <- up[which(is.na(gene))]
    gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
    ensg    <- uniprot_to_ensembl(up)
    id      <- uniprot_to_geneid(up)
    gname   <- uniprot_to_genename(up, parallelize = TRUE)
    
    canato2018_4RK <- tibble(symbol  = gene,
                             uniprot = up,
                             ensembl = ensg,
                             entrez  = id,
                             name    = gname)
      
    write.csv(canato2018_4RK, path_canato2018_4RK, row.names = FALSE)
}

# Library statistics (mapped genes)
bad_up          <- extraids %>%
                  filter(!is.na(uniprot) & !in_database & uniprot %in% canato2018_4RK$uniprot) %>% 
                  pull(uniprot)                       # Deprecated Uniprot IDs

canato2018_4RKF <- canato2018_4RK %>%
                   group_by(uniprot) %>%
                   slice(1) %>%                # Remove duplicate uniprot IDs
                   ungroup() %>%
                   filter(!is.na(uniprot)) %>%    # Remove unknowns
                   filter(!uniprot %in% bad_up)   # Uniprots not in the database
 
n_canato2018_4RK  <- nrow(canato2018_4RK)
n_canato2018_4RKF <- nrow(canato2018_4RKF)

F508del-CFTR interactome:
* All data: 828 genes.
* After name updating: 808 genes.

4RK-F508del-CFTR interactome:
* All data: 793 genes.
* After name updating: 774 genes.

Primary traffic hits & Canato2018

id symbol uniprot name
1 DNAJC8 O75937 DnaJ homolog subfamily C member 8
2 NCBP2 P52298 Nuclear cap-binding protein subunit 2

Santos CFTR interactome (2019)

Cells (2019) 8(4):353 Folding Status Is Determinant over Traffic-Competence in Defining CFTR Interactors in the Endoplasmic Reticulum
João D. Santos, Sara Canato, Ana S. Carvalho, Hugo M. Botelho, Kerman Aloria, Margarida D. Amaral, Rune Matthiesen, Andre O. Falcao, Carlos M. Farinha
https://doi.org/10.3390/cells8040353

Pulled-down proteins that interact with either wt- or DD/AA-CFTR in CFBE cells following a general strategy consisting of: (i) cell lysis and immunoprecipitation (IP) of CFTR and interacting proteins; (ii) identification of proteins by LC-MS/MS; and (iii) analysis of the CFTR interactome.

The authors aimed to explore the CFTR ERQC trafficking checkpoints by performing global comparisons of the interactions that regulate CFTR exit from the ER to identify protein factors involved in these processes. They used protein interaction profiling and global bioinformatics analyses and identified proteins that interact specifically with peptides harbouring the normal (wt) and 4RK AFT sequences, as well as proteins that interact with CFTR with/without abrogation of the DAD code (whose presence totally blocks CFTR exit from the ER). They also cross their datasets with the ones from Canato et al 2018.

An important outcome of this study is a wt-CFTR interactome from CFBE cells, which can be found in Dataset S1B and comprises 945 proteins. An interactome for DD/AA-CFTR is also provided.

# wt-CFTR interactome
# Get data
if(file.exists(path_santos2019_wt)){
  
    santos2019_wt <- read_csv(path_santos2019_wt)
    
} else{

    tmp    <- tempfile()
    tmpdir <- paste0(dirname(tmp), "/santos2019")
    download.file(url = src_santos2019, destfile = tmp, mode="wb")
    unzip(tmp, exdir = tmpdir)
    
    gene0 <- read.xlsx(paste0(tmpdir, "/Supplementary materials/Datasets_S1 to S7.xlsx"), sheet = 2, colNames = FALSE)[[1]]
    up      <- genesymbol_to_uniprot(gene0)
    gene_na <- gene0[which(is.na(up))]
    up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"]
    gene    <- uniprot_to_genesymbol(up)
    ensg    <- uniprot_to_ensembl(up)
    id      <- uniprot_to_geneid(up)
    gname   <- uniprot_to_genename(up, parallelize = TRUE)
    
    santos2019_wt <- tibble(symbol  = gene,
                            uniprot = up,
                            ensembl = ensg,
                            entrez  = id,
                            name    = gname)
      
    write.csv(santos2019_wt, path_santos2019_wt, row.names = FALSE)
}

# Library statistics (mapped genes)
santos2019_wtF <- santos2019_wt %>%
                  group_by(uniprot) %>%
                  slice(1) %>%                # Remove duplicate uniprot IDs
                  ungroup() %>%
                  filter(!is.na(uniprot))     # Remove unknowns

n_santos2019_wt  <- nrow(santos2019_wt)
n_santos2019_wtF <- nrow(santos2019_wtF)



# DD/AA-CFTR interactome
# Get data
if(file.exists(path_santos2019_DDAA)){
  
    santos2019_DDAA <- read_csv(path_santos2019_DDAA)
    
} else{

    tmp    <- tempfile()
    tmpdir <- paste0(dirname(tmp), "/santos2019")
    download.file(url = src_santos2019, destfile = tmp, mode="wb")
    unzip(tmp, exdir = tmpdir)
    
    gene0 <- read.xlsx(paste0(tmpdir, "/Supplementary materials/Datasets_S1 to S7.xlsx"), sheet = 1, colNames = FALSE)[[1]]
    up      <- genesymbol_to_uniprot(gene0)
    gene_na <- gene0[which(is.na(up))]
    up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"]
    gene    <- uniprot_to_genesymbol(up)
    ensg    <- uniprot_to_ensembl(up)
    id      <- uniprot_to_geneid(up)
    gname   <- uniprot_to_genename(up, parallelize = TRUE)
    
    santos2019_DDAA <- tibble(symbol  = gene,
                              uniprot = up,
                              ensembl = ensg,
                              entrez  = id,
                              name    = gname)
      
    write.csv(santos2019_DDAA, path_santos2019_DDAA, row.names = FALSE)
}

# Library statistics (mapped genes)
santos2019_DDAAF <- santos2019_DDAA %>%
                    group_by(uniprot) %>%
                    slice(1) %>%                # Remove duplicate uniprot IDs
                    ungroup() %>%
                    filter(!is.na(uniprot))     # Remove unknowns

n_santos2019_DDAA  <- nrow(santos2019_DDAA)
n_santos2019_DDAAF <- nrow(santos2019_DDAAF)

wt-CFTR interactome:
* All data: 945 genes.
* After name updating: 936 genes.

DD/AA-CFTR interactome:
* All data: 979 genes.
* After name updating: 970 genes.

Primary traffic hits & Santos2019

id symbol uniprot name
1 CCT5 P48643 T-complex protein 1 subunit epsilon
2 DDX5 P17844 Probable ATP-dependent RNA helicase DDX5
3 MYH14 Q7Z406 Myosin-14
4 PABPC3 Q9H361 Polyadenylate-binding protein 3
5 PSMA6 P60900 Proteasome subunit alpha type-6

Hutt CFTR interactome (2018)

J Mol Biol (2018) 430: 2951-2973
A Proteomic Variant Approach (ProVarA) for Personalized Medicine of Inherited and Somatic Disease
Darren M. Hutt, Salvatore Loguercio, Alexandre Rosa Campos and William E. Balch https://doi.org/10.1016/j.jmb.2018.06.017

A cohort of proteins that are differentially interacting with a broad spectrum of CFTR variants (including F508del) in CFBE cells, identified through a co-purifying protein identification technology (CoPIT) methodology [23], an immunoprecipitation-based proteomic approach: ProVarA.

The authors create a proteomic methodology called proteomic variant approach (ProVarA), which uses the CoPIT methodoly in order capture critical protein interaction profiles (PIPs) to understand the onset and progression of variant-specific disease. They apply this method to 1) interrogate the impact of different CF-causing mutations on the functional interactions of CFTR, as well as 2) assess the impact of therapeutics on the profiles of the disease-causing variants. Besides mapping the PIPs, the authors are also able to pinpoint the binding affinities of the proteins within the PIPs, which is one of the greatest strengths of this methodology. This work was performed on CFBE41o- transiently transfected with CFTR variants.

An important outcome of this study are the PIPs for WT- and F508del-CFTR in Table S1 and S2 respectively. Other outputs include PIPs for other CFTR variants (i.e. G85E, R560T, N1303K and G551D) as well as G551D-CFTR treated with VX-770 and F508del-CFTR treated with VX-809. Importantly, the PIPs for each condition were selected if they exhibited a statistically significant difference in recovery relative to the control CFTR immunoprecipitation in GFP transduced CFBE41o- null cells.

# wt-CFTR interactome
# Get data
if(file.exists(path_hutt2018_wt)){
  
    hutt2018_wt <- read_csv(path_hutt2018_wt)
    
} else{

    tmp    <- tempfile()
    download.file(url = src_hutt2018_wt, destfile = tmp, mode="wb")

    df    <- tibble(read.xlsx(tmp, sheet = 2)) %>%
             mutate(intensity = as.numeric(.[[4]])) %>%
             filter(!is.na(intensity))
    up0   <- df$Proteins
    up    <- unlist(strsplit(up0, ";"))
    gene  <- uniprot_to_genesymbol(up)
    up_na <- up[which(is.na(gene))]
    gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)
    
    hutt2018_wt <- tibble(symbol  = gene,
                          uniprot = up,
                          ensembl = ensg,
                          entrez  = id,
                          name    = gname)
      
    write.csv(hutt2018_wt, path_hutt2018_wt, row.names = FALSE)
}

# Library statistics (mapped genes)
bad_up       <- extraids %>%
                filter(!is.na(uniprot) & !in_database & uniprot %in% hutt2018_wt$uniprot) %>% 
                pull(uniprot)                       # Deprecated Uniprot IDs

hutt2018_wtF <- hutt2018_wt %>%
                group_by(uniprot) %>%
                slice(1) %>%                # Remove duplicate uniprot IDs
                ungroup() %>%
                filter(!is.na(uniprot)) %>%     # Remove unknowns
                filter(!uniprot %in% bad_up)

n_hutt2018_wt  <- nrow(hutt2018_wt)
n_hutt2018_wtF <- nrow(hutt2018_wtF)



# F508del-CFTR-CFTR interactome
# Get data
if(file.exists(path_hutt2018_df)){
  
    hutt2018_df <- read_csv(path_hutt2018_df)
    
} else{

    tmp    <- tempfile()
    download.file(url = src_hutt2018_df, destfile = tmp, mode="wb")

    df    <- tibble(read.xlsx(tmp, sheet = 2)) %>%
             mutate(intensity = as.numeric(.[[4]])) %>%
             filter(!is.na(intensity))
    up0   <- df$Proteins
    up    <- unlist(strsplit(up0, ";"))
    gene  <- uniprot_to_genesymbol(up)
    up_na <- up[which(is.na(gene))]
    gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)
    
    hutt2018_df <- tibble(symbol  = gene,
                          uniprot = up,
                          ensembl = ensg,
                          entrez  = id,
                          name    = gname)
      
    write.csv(hutt2018_df, path_hutt2018_df, row.names = FALSE)
}

# Library statistics (mapped genes)
bad_up       <- extraids %>%
                filter(!is.na(uniprot) & !in_database & uniprot %in% hutt2018_df$uniprot) %>% 
                pull(uniprot)                       # Deprecated Uniprot IDs

hutt2018_dfF <- hutt2018_df %>%
                group_by(uniprot) %>%
                slice(1) %>%                # Remove duplicate uniprot IDs
                ungroup() %>%
                filter(!is.na(uniprot)) %>%     # Remove unknowns
                filter(!uniprot %in% bad_up)

n_hutt2018_df  <- nrow(hutt2018_df)
n_hutt2018_dfF <- nrow(hutt2018_dfF)

wt-CFTR interactome:
* All data: 731 genes.
* After name updating: 656 genes.

F508del-CFTR interactome:
* All data: 621 genes.
* After name updating: 558 genes.

Primary traffic hits & Hutt2018

id symbol uniprot name
1 CCT5 P48643 T-complex protein 1 subunit epsilon
2 DDX5 P17844 Probable ATP-dependent RNA helicase DDX5
3 MYH14 Q7Z406 Myosin-14
4 PABPC3 Q9H361 Polyadenylate-binding protein 3
5 PSMA6 P60900 Proteasome subunit alpha type-6

Rauniyar CFTR proteome (2014)

J Proteome Res (2014) 13(11): 4668-4675
Quantitative Proteomic Profiling Reveals Differentially Regulated Proteins in Cystic Fibrosis Cells
Navin Rauniyar, Vijay Gupta, William E. Balch, and John R. Yates, III
https://doi.org/10.1021/pr500370g

Differential protein expression between a cell line model of cystic fibrosis (i.e., bronchial epithelial cells expressing F508del-CFTR (CFBE cells)) and wild-type CFTR (HBE cells).

The authors use LC-MS/MS and the MudPIT method to investigate the differential protein expression between a cell line model of cystic fibrosis (i.e., bronchial epithelial cells expressing F508del-CFTR, CFBE cells) and wild-type CFTR (HBE cells). This is a global proteomics approach to understand how mutant CFTR may lead to perturbed molecular pathways.

The most important outcome of this study is the raw data presented in Table S1, which contains the list of identified proteins from the HBE (sheet 1) and CFBE (sheet 2), as well as a common proteins list (sheet 3). Although the authors include all the data they obtained, they advise (correctly) to only consider the proteins that were present in all three biological replicates, which is indicated by the column “Occurrence” (occurrence = 3). These are 3586 proteins for the HBE cells, and 3566 for the CFBE cells. Uniprot ID information is also present in this table.

To be done.

Almaça ENaC interactome (2013)

Cell (2013) 154(6):1390-400
High-content siRNA screen reveals global ENaC regulators and potential cystic fibrosis therapy targets
Joana Almaça, Diana Faria, Marisa Sousa, Inna Uliyakina, Christian Conrad, Lalida Sirianant, Luka A Clarke, José Paulo Martins, Miguel Santos, Jean-Karim Heriché, Wolfgang Huber, Rainer Schreiber, Rainer Pepperkok, Karl Kunzelmann, Margarida D Amaral
https://doi.org/10.1016/j.cell.2013.08.045

# Validated ENaC hits
# Get data
if(file.exists(path_almaca2013)){
  
    almaca2013 <- read_csv(path_almaca2013)
    
} else{

    tmp     <- tempfile()
    download.file(url = src_almaca2013, destfile = tmp, mode="wb")

    df      <- tibble(read.xlsx(tmp, sheet = 2, startRow = 5)) %>%
               filter(!is.na(Ensemble.ID))
    ensg0   <- df$Ensemble.ID
    up      <- ensembl_to_uniprot(ensg0)
    ensg_na <- ensg0[which(is.na(up))]                                        # Emsembl IDs for which a gene symbol could not be found
    up[is.na(up)] <- extraids[match(ensg_na, extraids$ensembl), "uniprot"]    # Add missing uniprot IDs
    gene    <- uniprot_to_genesymbol(up)
    ensg    <- uniprot_to_ensembl(up)
    id      <- uniprot_to_geneid(up)
    gname   <- uniprot_to_genename(up, parallelize = TRUE)

    almaca2013  <- tibble(symbol  = gene,
                          uniprot = up,
                          ensembl = ensg,
                          entrez  = id,
                          name    = gname)
      
    write.csv(almaca2013, path_almaca2013, row.names = FALSE)
}

# Library statistics (mapped genes)
almaca2013F <- almaca2013 %>%
               group_by(uniprot) %>%
               slice(1) %>%                # Remove duplicate uniprot IDs
               ungroup() %>%
               filter(!is.na(uniprot))     # Remove unknowns

n_almaca2013  <- nrow(almaca2013)
n_almaca2013F <- nrow(almaca2013F)

ENaC activating genes:
* All data: 166 genes.
* After name updating: 165 genes.

Primary traffic hits & Almaça2013

id symbol uniprot name
1 FRK P42685 Tyrosine-protein kinase FRK
2 GRK5 P34947 G protein-coupled receptor kinase 5
3 ITPK1 Q13572 Inositol-tetrakisphosphate 1-kinase
4 PPP1R1B Q9UD71 Protein phosphatase 1 regulatory subunit 1B

Tomati CFTR regulome (2018)

J Biol Chem (2018) 293(4):1203-1217

High-throughput screening identifies FAU protein as a regulator of mutant cystic fibrosis transmembrane conductance regulator channel
Valeria Tomati, Emanuela Pesce, Emanuela Caci, Elvira Sondo, Paolo Scudieri, Monica Marini, Felice Amato, Giuseppe Castaldo, Roberto Ravazzolo, Luis J.V.Galietta, Nicoletta Pedemonte
https://doi.org/10.1074/jbc.M117.816595

Genes whose silencing significantly rescued F508del-CFTR activity, as indicated by enhanced anion transport through the plasma membrane.

The authors performed the first genome-wide screening of a druggable-genome library of siRNA molecules in bronchial epithelial cells to find proteins whose suppression results in a significant functional and biochemical rescue of F508del-CFTR. They were able to find 37 validated targets, which are displayed in the paper itself in Table 1. They then dissect some of them, particularly FAU, regarding their MoA.

if(file.exists(path_tomati2018)){

    tomati2018 <- read_csv(path_tomati2018)

} else{
  
    # Transcribe data
    df <- tibble(gene = c("ALOX5AP","ANXA11","CDK6","CHD4","CNTN1","CSNK2A1","FAU","FRAG1","GALK1",
                          "GJA5","GJB4","GPR149","KCTD8","LRRC59","MLLT6","NR2E3","P2RX1","PHF12",
                          "PLXNA1","PLXND1","PRPS1","PTCH1","RARA","RBBP6","RNF207","RORB","SEC22B",
                          "SGK223","SLC25A1","SNRNP25","TLR7","TRIM24","UBA2","UBA52","UBE2C","UBE2I","UBXN6"),
                mean = c(1.302,1.307,1.342,1.787,1.309,1.395,1.621,1.382,1.304,1.346,1.293,1.445,1.4,1.37,
                         1.501,1.419,1.291,1.49,1.484,1.295,1.322,1.425,1.31,1.472,1.449,1.328,1.365,1.411,
                         1.303,1.519,1.376,1.739,1.653,1.922,1.357,1.639,1.366),
                 uniprot = c("P20292", "P50995", "Q00534", "Q14839", "Q12860", "P68400", "P62861/P35544", 
                             "Q9UHJ9", "P51570", "P36382", "Q9NTQ9", "Q86SP6", "Q6ZWB6", "Q96AG4", 
                             "P55198", "Q9Y5X4", "P51575", "Q96QT6", "Q9UIW2", "Q9Y4D7", "P60891", 
                             "Q13635", "P10276", "Q7Z6E9", "Q6ZRF8", "Q92753", "O75396", "Q86YV5", 
                             "P53007", "Q9BV90", "Q9NYK1", "O15164", "Q9UBT2", "P62987", "O00762", 
                             "P63279", "Q9BZV1"))
    
    up    <- sub("(.*)/(.*)", "\\1", df$uniprot)
    gene  <- uniprot_to_genesymbol(up)
    up_na <- up[which(is.na(gene))]
    gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = T)

    tomati2018  <- tibble(symbol  = gene,
                          uniprot = up,
                          ensembl = ensg,
                          entrez  = id,
                          name    = gname,
                          score   = df$mean)
    
    write.csv(tomati2018, path_tomati2018, row.names = FALSE)
}

# Library statistics (mapped genes)
tomati2018F <- tomati2018 %>%
               group_by(uniprot) %>%
               slice(1) %>%                # Remove duplicate uniprot IDs
               ungroup() %>%
               filter(!is.na(uniprot))     # Remove unknowns

n_tomati2018  <- nrow(tomati2018)
n_tomati2018F <- nrow(tomati2018F)

Primary traffic hits & Tomati2018

id symbol uniprot name
1 UBE2I P63279 SUMO-conjugating enzyme UBC9

Simpson secretome (2012)

Nature Cell Biology (2012) 14, 764–774
Genome-wide RNAi screening identifies human proteins with a regulatory function in the early secretory pathway
Jeremy C. Simpson, Brigitte Joggerst, Vibor Laketa, Fatima Verissimo, Cihan Cetin, Holger Erfle, Mariana G. Bexiga, Vasanth R. Singan, Jean-Karim Hériché, Beate Neumann, Alvaro Mateos, Jonathon Blake, Stephanie Bechtel, Vladimir Benes, Stefan Wiemann, Jan Ellenberg & Rainer Pepperkok
https://doi.org/10.1038/ncb2510

Proteins influencing secretion of tsO45G in HeLa cells.

# Get data
if(file.exists(path_simpson2012)){
  
    simpson2012 <- read_csv(path_simpson2012)
    
} else{

    tmp   <- tempfile()
    download.file(url = src_simpson2012, destfile = tmp, mode="wb")

    df      <- tibble(read_excel(tmp))
    ensg0   <- df[["ENSEMBL ID"]]
    up      <- ensembl_to_uniprot(ensg0)
    ensg_na <- ensg0[which(is.na(up))]
    up[is.na(up)] <- extraids[match(ensg_na, extraids$ensembl), "uniprot"]
    gene    <- uniprot_to_genesymbol(up)                                            # Get gene symbols
    ensg    <- uniprot_to_ensembl(up)
    id      <- uniprot_to_geneid(up)
    gname   <- uniprot_to_genename(up, parallelize = TRUE)

    simpson2012 <- tibble(symbol  = gene,
                          uniprot = up,
                          ensembl = ensg,
                          entrez  = id,
                          name    = gname)
      
    write.csv(simpson2012, path_simpson2012, row.names = FALSE)
}

# Library statistics (mapped genes)
simpson2012F   <- simpson2012 %>%
                      group_by(uniprot) %>%
                      slice(1) %>%                # Remove duplicate uniprot IDs
                      ungroup() %>%
                      filter(!is.na(uniprot))     # Remove unknowns

n_simpson2012  <- nrow(simpson2012)
n_simpson2012F <- nrow(simpson2012F)

Traffic regulating genes:
* All data: 554 genes.
* After name updating: 554 genes.

Primary traffic hits & Simpson2012

id symbol uniprot name
1 CNKSR1 Q969H4 Connector enhancer of kinase suppressor of ras 1
2 CNR2 P34972 Cannabinoid receptor 2
3 DGUOK Q16854 Deoxyguanosine kinase, mitochondrial
4 EMILIN2 Q9BXX0 EMILIN-2
5 FER P16591 Tyrosine-protein kinase Fer
6 GOSR2 O14653 Golgi SNAP receptor complex member 2
7 OXSR1 O95747 Serine/threonine-protein kinase OSR1
8 ZBTB25 P24278 Zinc finger and BTB domain-containing protein 25

Confirmed traffic hits & Simpson2012

id symbol uniprot name
1 OXSR1 O95747 Serine/threonine-protein kinase OSR1

Wang interactome (2006)

Cell (2006) 127(4):803-815
Hsp90 Cochaperone Aha1 Downregulation Rescues Misfolding of CFTR in Cystic Fibrosis
Xiaodong Wang, John Venable, Paul LaPointe, Darren M. Hutt, Atanas V. Koulov, Judith Coppinger, Cemal Gurkan, Wendy Kellner, Jeanne Matteson, Helen Plutner, John R. Riordan, Jeffery W. Kelly, John R. Yates III, William E. Balch
https://doi.org/10.1016/j.cell.2006.09.043

CFTR-specific proteomes immunoprecipitated from matched BHK cell lines heterologously expressing wt- or F508del-CFTR.

The authors apply mass spectrometry through the use of multidimensional protein identification technology (MudPIT) to begin to define the global protein interactions of CFTR – the CFTR interactome. The work is mostly focused on the ER folding and export proteome, but other wt-CFTR interactors were also found, and some are even discussed in the supplemental discussion.

Experiments were conducted on 4 different cell lines: BHK (kidney, hamster), Calu-3 (lung, human), HT29 (intestine, human), and T84 (intestine, human). Interestingly all the human cell lines are cancer cell lines. Might make sense to consider the full interactome (167 proteins), Table S1, or simply the Calu-3 interactome, Table S5 Protein (50 proteins). Experiments were also conducted on BHK F508del-CFTR cells, but these should probably not be considered.

# wt-CFTR Interactome in BHK cells
# Get data
if(file.exists(path_wang2006_wt)){
  
    wang2006_wt <- read_csv(path_wang2006_wt)
    
} else{

    tmp   <- tempfile()
    download.file(url = src_wang2006, destfile = tmp, mode="wb")

    df    <- tibble(read_excel(tmp, sheet = 3))
    id0   <- df[[1]]
    up    <- geneid_to_uniprot(id0)
    id_na <- id0[which(is.na(up))]                                               # Gene IDs for which a uniprot ID could not be found
    up[is.na(up)] <- extraids[match(id_na, extraids$entrez), "uniprot"]       # Add missing uniprot IDs
    gene  <- uniprot_to_genesymbol(up)                                            # Get gene symbols
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)

    wang2006_wt  <- tibble(symbol  = gene,
                           uniprot = up,
                           ensembl = ensg,
                           entrez  = id,
                           name    = gname)
      
    write.csv(wang2006_wt, path_wang2006_wt, row.names = FALSE)
}

# Library statistics (mapped genes)
wang2006_wtF   <- wang2006_wt %>%
                      group_by(uniprot) %>%
                      slice(1) %>%                # Remove duplicate uniprot IDs
                      ungroup() %>%
                      filter(!is.na(uniprot))     # Remove unknowns

n_wang2006_wt  <- nrow(wang2006_wt)
n_wang2006_wtF <- nrow(wang2006_wtF)


# F508del-CFTR Interactome in BHK cells
# Get data
if(file.exists(path_wang2006_df)){
  
    wang2006_df <- read_csv(path_wang2006_df)
    
} else{

    tmp   <- tempfile()
    download.file(url = src_wang2006, destfile = tmp, mode="wb")

    df    <- tibble(read_excel(tmp, sheet = 5))
    id0   <- df[[1]]
    up    <- geneid_to_uniprot(id0)
    id_na <- id0[which(is.na(up))]                                               # Gene IDs for which a uniprot ID could not be found
    up[is.na(up)] <- extraids[match(id_na, extraids$entrez), "uniprot"]       # Add missing uniprot IDs
    gene  <- uniprot_to_genesymbol(up)                                            # Get gene symbols
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)

    wang2006_df  <- tibble(symbol  = gene,
                           uniprot = up,
                           ensembl = ensg,
                           entrez  = id,
                           name    = gname)
      
    write.csv(wang2006_df, path_wang2006_df, row.names = FALSE)
}

# Library statistics (mapped genes)
wang2006_dfF   <- wang2006_df %>%
                      group_by(uniprot) %>%
                      slice(1) %>%                # Remove duplicate uniprot IDs
                      ungroup() %>%
                      filter(!is.na(uniprot))     # Remove unknowns

n_wang2006_df  <- nrow(wang2006_df)
n_wang2006_dfF <- nrow(wang2006_dfF)

wt-CFTR interactome (BHK):
* All data: 76 genes.
* After name updating: 76 genes.

F508del-CFTR interactome (BHK):
* All data: 46 genes.
* After name updating: 46 genes.

Primary traffic hits & Wang2006

Reilly interactome (2017)

Sci Rep (2017) 9;7(1):7642
Targeting the PI3K/Akt/mTOR signalling pathway in Cystic Fibrosis
R Reilly, M S Mroz, E Dempsey, K Wynne, S J Keely, E F McKone, C Hiebel, C Behl, J A Coppinger
https://doi.org/10.1038/s41598-017-06588-z

IP-MS profiling of CFTR-containing protein complexes immunoprecipitated from CFBE41o- abd HBE41o- cells expressing F508del-CFTR and wt-CFTR, respectively.

# wt-CFTR Interactome
# Get data
if(file.exists(path_reilly2017_wt)){
  
    reilly2017_wt <- read_csv(path_reilly2017_wt)
    
} else{

    tmp   <- tempfile()
    download.file(url = src_reilly2017_wt, destfile = tmp, mode="wb")

    df      <- tibble(read_excel(tmp, skip = 1)) %>%
               mutate(gene = sub("(.*)_.*$", "\\1", Accession))
    gene0   <- df$gene
    up      <- genesymbol_to_uniprot(gene0)
    gene_na <- gene0[which(is.na(up))]
    up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"]
    gene  <- uniprot_to_genesymbol(up)
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)

    reilly2017_wt  <- tibble(symbol  = gene,
                             uniprot = up,
                             ensembl = ensg,
                             entrez  = id,
                             name    = gname)
      
    write.csv(reilly2017_wt, path_reilly2017_wt, row.names = FALSE)
}

# Library statistics (mapped genes)
reilly2017_wtF   <- reilly2017_wt %>%
                    group_by(uniprot) %>%
                    slice(1) %>%                # Remove duplicate uniprot IDs
                    ungroup() %>%
                    filter(!is.na(uniprot))     # Remove unknowns

n_reilly2017_wt  <- nrow(reilly2017_wt)
n_reilly2017_wtF <- nrow(reilly2017_wtF)


# F508del-CFTR Interactome
# Get data
if(file.exists(path_reilly2017_df)){
  
    reilly2017_df <- read_csv(path_reilly2017_df)
    
} else{

    tmp   <- tempfile()
    download.file(url = src_reilly2017_df, destfile = tmp, mode="wb")

    df      <- tibble(read_excel(tmp, skip = 1)) %>%
               mutate(gene = sub("(.*)_.*$", "\\1", Accession))
    gene0   <- df$gene
    up      <- genesymbol_to_uniprot(gene0)
    gene_na <- gene0[which(is.na(up))]
    up[is.na(up)] <- extraids[match(gene_na, extraids$symbol), "uniprot"]
    gene  <- uniprot_to_genesymbol(up)
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)

    reilly2017_df  <- tibble(symbol  = gene,
                             uniprot = up,
                             ensembl = ensg,
                             entrez  = id,
                             name    = gname)
      
    write.csv(reilly2017_df, path_reilly2017_df, row.names = FALSE)
}

# Library statistics (mapped genes)
reilly2017_dfF   <- reilly2017_df %>%
                    group_by(uniprot) %>%
                    slice(1) %>%                # Remove duplicate uniprot IDs
                    ungroup() %>%
                    filter(!is.na(uniprot))     # Remove unknowns

n_reilly2017_df  <- nrow(reilly2017_df)
n_reilly2017_dfF <- nrow(reilly2017_dfF)

wt-CFTR interactome:
* All data: 504 genes.
* After name updating: 491 genes.

F508del-CFTR interactome:
* All data: 506 genes.
* After name updating: 478 genes.

Primary traffic hits & Reilly2017 (wt)

id symbol uniprot name
1 HELLS Q9NRZ9 Lymphoid-specific helicase
2 MYH14 Q7Z406 Myosin-14

Primary traffic hits & Reilly2017 (F508del)

id symbol uniprot name
1 ARHGEF2 Q92974 Rho guanine nucleotide exchange factor 2
2 COL5A1 P20908 Collagen alpha-1(V) chain
3 LAMB1 P07942 Laminin subunit beta-1
4 MGAT1 P26572 Alpha-1,3-mannosyl-glycoprotein 2-beta-N-acetylglucosaminyltransferase
5 MYH14 Q7Z406 Myosin-14
6 PABPC3 Q9H361 Polyadenylate-binding protein 3

Primary traffic hits & Reilly2017 (wt & F508del)

id symbol uniprot name
1 MYH14 Q7Z406 Myosin-14

Gilchrist secretome (2006)

Cell (2006) 127(6):1265-1281
Quantitative Proteomics Analysis of the Secretory Pathway
Annalyn Gilchrist, Catherine E. Au, Johan Hiding, Alexander W. Bell, Julia Fernandez-Rodriguez, Souad Lesimple, Hisao Nagaya, Line Roy, Sara J.C. Gosline, Michael Hallett, Jacques Paiement, Robert E. Kearney, Tommy Nilsson, John J.M. Bergeron
https://doi.org/10.1016/j.cell.2006.10.036

Secretory pathway proteome, with information about the intracellular localization and relative presence of each protein in the rough and smooth ER, Golgi cisternae and Golgi-derived COPI vesicles.

I did not manage to analyze these data because I could not process the gene identifiers: NCBI gi numbers.

Pankow CFTR interactome and PTM modifications (2019)

Sci Signal (2019) 12(562):eaan7984
A posttranslational modification code for CFTR maturation is altered in cystic fibrosis
Sandra Pankow, Casimir Bamberger, John R Yates 3rd
https://doi.org/10.1126/scisignal.aan7984

wt-CFTR interactome upon treatment with CX-4945 (also known as silmitasertib, a competitive inhibitor of CK2alpha).

# wild-type CFTR interactome upon CX-4945 treatment
# Get data
if(file.exists(path_pankow2019)){
  
    pankow2019 <- read_csv(path_pankow2019)
    
} else{

    tmp   <- tempfile()
    download.file(url = src_pankow2019, destfile = tmp, mode="wb")

    df      <- tibble(read.xlsx(tmp)) %>%
               filter(Protein.evidence == "CONCLUSIVE") %>%
               rename(uniprot = `ACC(s)`) %>%
               mutate(uniprot = gsub("Reverse_", "", uniprot))
    up    <- df$uniprot
    gene  <- uniprot_to_genesymbol(up)
    up_na <- up[which(is.na(gene))]
    gene[is.na(gene)] <- extraids[match(up_na, extraids$uniprot), "symbol"]
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)

    pankow2019  <- tibble(symbol  = gene,
                          uniprot = up,
                          ensembl = ensg,
                          entrez  = id,
                          name    = gname)
      
    write.csv(pankow2019, path_pankow2019, row.names = FALSE)
}

# Library statistics (mapped genes)
pankow2019F   <- pankow2019 %>%
                 group_by(uniprot) %>%
                 slice(1) %>%                # Remove duplicate uniprot IDs
                 ungroup() %>%
                 filter(!is.na(uniprot))     # Remove unknowns

n_pankow2019  <- nrow(pankow2019)
n_pankow2019F <- nrow(pankow2019F)

wt-CFTR interactome, CX-4945:
* All data: 2080 genes.
* After name updating: 2071 genes.

id symbol uniprot name
1 ARHGEF2 Q92974 Rho guanine nucleotide exchange factor 2
2 ARVCF O00192 Armadillo repeat protein deleted in velo-cardio-facial syndrome
3 ATP1B3 P54709 Sodium/potassium-transporting ATPase subunit beta-3
4 AURKA O14965 Aurora kinase A
5 BAG3 O95817 BAG family molecular chaperone regulator 3
6 CCT5 P48643 T-complex protein 1 subunit epsilon
7 CLK1 P49759 Dual specificity protein kinase CLK1
8 DDX5 P17844 Probable ATP-dependent RNA helicase DDX5
9 DNAJC8 O75937 DnaJ homolog subfamily C member 8
10 HELLS Q9NRZ9 Lymphoid-specific helicase
11 LAMA3 Q16787 Laminin subunit alpha-3
12 LAMB1 P07942 Laminin subunit beta-1
13 MICB Q29980 MHC class I polypeptide-related sequence B
14 MMS19 Q96T76 MMS19 nucleotide excision repair protein homolog
15 MYH14 Q7Z406 Myosin-14
16 NRIP1 P48552 Nuclear receptor-interacting protein 1
17 OXSR1 O95747 Serine/threonine-protein kinase OSR1
18 PDE8A O60658 High affinity cAMP-specific and IBMX-insensitive 3’,5’-cyclic phosphodiesterase 8A
19 PSMA6 P60900 Proteasome subunit alpha type-6
20 PSMB1 P20618 Proteasome subunit beta type-1
21 RELA Q04206 Transcription factor p65
22 SLC34A1 Q06495 Sodium-dependent phosphate transport protein 2A
23 VPS26A O75436 Vacuolar protein sorting-associated protein 26A

Dang (2020)

PLoS One (2020) 15(11):e0239189
Mining GWAS and eQTL data for CF lung disease modifiers by gene expression imputation
Hong Dang, Deepika Polineni, Rhonda G Pace, Jaclyn R Stonebraker, Harriet Corvol, Garry R Cutting, Mitchell L Drumm, Lisa J Strug, Wanda K O’Neal, Michael R Knowles
https://doi.org/10.1371/journal.pone.0239189

Candidate modifier genes of CF lung disease. Obtained from an integrted analysis of expression data from CF cohorts and Genotype-Tissue Expression (GTEx) reference data sets from multiple human tissues to generate predictive models, which were used to impute transcriptional regulation from genetic variance in a GWAS population.

# lung disease modifier genes
# Get data
if(file.exists(path_dang2020)){
  
    dang2020 <- read_csv(path_dang2020)
    
} else{

    tmp   <- tempfile()
    download.file(url = src_dang2020, destfile = tmp, mode="wb")

    df      <- tibble(read.xlsx(tmp))
    ensg0   <- df$ENSGID
    up      <- ensembl_to_uniprot(ensg0)
    ensg_na <- ensg0[which(is.na(up))]
    up[is.na(up)] <- extraids[match(ensg_na, extraids$ensembl), "uniprot"]
    gene  <- uniprot_to_genesymbol(up)                                            # Get gene symbols
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)

    dang2020  <- tibble(symbol  = gene,
                        uniprot = up,
                        ensembl = ensg,
                        entrez  = id,
                        name    = gname)
      
    write.csv(dang2020, path_dang2020, row.names = FALSE)
}

# Library statistics (mapped genes)
dang2020F   <- dang2020 %>%
               group_by(uniprot) %>%
               slice(1) %>%                # Remove duplicate uniprot IDs
               ungroup() %>%
               filter(!is.na(uniprot))     # Remove unknowns

n_dang2020  <- nrow(dang2020)
n_dang2020F <- nrow(dang2020F)

Primary traffic hits & Dang2020

id symbol uniprot name
1 ARVCF O00192 Armadillo repeat protein deleted in velo-cardio-facial syndrome
2 ATXN1 P54253 Ataxin-1
3 DNAJB7 Q7Z6W7 DnaJ homolog subfamily B member 7
4 FRK P42685 Tyrosine-protein kinase FRK
5 NRIP1 P48552 Nuclear receptor-interacting protein 1
6 TIMM10 P62072 Mitochondrial import inner membrane translocase subunit Tim10

Confirmed traffic hits & Dang2020

id symbol uniprot name
1 ARVCF O00192 Armadillo repeat protein deleted in velo-cardio-facial syndrome

Hodos (2020)

Sci Rep (2020) 10(1):20553
Integrative genomic meta‑analysis reveals novel molecular insights into cystic fibrosis and deltaF508‑CFTR rescue
Rachel A Hodos, Matthew D Strub, Shyam Ramachandran, Li Li, Paul B McCray Jr, Joel T Dudley
http://dx.doi.org/10.1038/s41598-020-76347-0

Meta analyiss of publicly available datasets. CFTR Gene Set Library comprising 60 gene sets with various associations to CF or CFTR.

This work is the largest CF transcriptomic meta-analysis to date, having the advantage of being a quantitative meta-analysis, combining fold-change estimates across studies.

A major output of this work is the CFTR Gene Set Library compiled from 60 gene sets from 34 publications, which should be of great interest to future genomic studies of CF and CFTR.

# Differentially expressed genes
# Get data
if(file.exists(path_hodos2020_deg)){
  
    hodos2020_deg <- read_csv(path_hodos2020_deg)
    
} else{

    tmp   <- tempfile()
    download.file(url = src_hodos2020_deg, destfile = tmp, mode="wb")
    
    names_sh <- getSheetNames(tmp)
    df       <- lapply(1:length(names_sh), function(x){
                    temp <- read.xlsx(tmp, sheet = x)
                    temp$signature <- names_sh[x]
                    temp
    })
    df <- tibble(do.call(rbind, df))
    
    id0   <- df$GeneID
    up    <- geneid_to_uniprot(id0)
    id_na <- id0[which(is.na(up))]
    up[is.na(up)] <- extraids[match(id_na, extraids$entrez), "uniprot"]
    gene  <- uniprot_to_genesymbol(up)
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)

    hodos2020_deg  <- tibble(symbol  = gene,
                             uniprot = up,
                             ensembl = ensg,
                             entrez  = id,
                             name    = gname,
                             signature = df$signature)
      
    write.csv(hodos2020_deg, path_hodos2020_deg, row.names = FALSE)
}

# Library statistics (mapped genes)
hodos2020_degF <- hodos2020_deg %>%
                  group_by(uniprot) %>%
                  slice(1) %>%                # Remove duplicate uniprot IDs
                  ungroup() %>%
                  filter(!is.na(uniprot))     # Remove unknowns

n_hodos2020_deg  <- nrow(hodos2020_deg)
n_hodos2020_degF <- nrow(hodos2020_degF)



# Core Gene Signatures
# Get data
if(file.exists(path_hodos2020_cosig)){
  
    hodos2020_cosig <- read_csv(path_hodos2020_cosig)
    
} else{

    tmp   <- tempfile()
    download.file(url = src_hodos2020_cosig, destfile = tmp, mode="wb")
    
    names_sh <- getSheetNames(tmp)
    df       <- lapply(1:length(names_sh), function(x){
                    temp <- read.xlsx(tmp, sheet = x)
                    temp <- temp[,c("Symbol",   "GeneID")]
                    temp$signature <- names_sh[x]
                    temp
    })
    df <- tibble(do.call(rbind, df))
    
    id0   <- df$GeneID
    up    <- geneid_to_uniprot(id0)
    id_na <- id0[which(is.na(up))]
    up[is.na(up)] <- extraids[match(id_na, extraids$entrez), "uniprot"]
    gene  <- uniprot_to_genesymbol(up)
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)

    hodos2020_cosig  <- tibble(symbol  = gene,
                               uniprot = up,
                               ensembl = ensg,
                               entrez  = id,
                               name    = gname,
                               signature = df$signature)
      
    write.csv(hodos2020_cosig, path_hodos2020_cosig, row.names = FALSE)
}

# Library statistics (mapped genes)
hodos2020_cosigF <- hodos2020_cosig %>%
                    group_by(uniprot) %>%
                    slice(1) %>%                # Remove duplicate uniprot IDs
                    ungroup() %>%
                    filter(!is.na(uniprot))     # Remove unknowns

n_hodos2020_cosig  <- nrow(hodos2020_cosig)
n_hodos2020_cosigF <- nrow(hodos2020_cosigF)



# Gene Signatures
# Get data
if(file.exists(path_hodos2020_allsig)){
  
    hodos2020_allsig <- read_csv(path_hodos2020_allsig)
    
} else{

    tmp   <- tempfile()
    download.file(url = src_hodos2020_allsig, destfile = tmp, mode="wb")
    
    df    <- tibble(read_excel(tmp)) %>%
             select(`Gene Set`, `Entrez IDs`) %>%
             rename(gene_set = `Gene Set`,
                    entrez   = `Entrez IDs`) %>%
             separate_rows(entrez, sep = ", ")
    
    id0   <- df$entrez
    up    <- geneid_to_uniprot(id0)
    id_na <- id0[which(is.na(up))]
    up[is.na(up)] <- extraids[match(id_na, extraids$entrez), "uniprot"]
    gene  <- uniprot_to_genesymbol(up)
    ensg  <- uniprot_to_ensembl(up)
    id    <- uniprot_to_geneid(up)
    gname <- uniprot_to_genename(up, parallelize = TRUE)

    hodos2020_allsig  <- tibble(symbol  = gene,
                                uniprot = up,
                                ensembl = ensg,
                                entrez  = id,
                                name    = gname,
                                geneset = df$gene_set)
      
    write.csv(hodos2020_allsig, path_hodos2020_allsig, row.names = FALSE)
}

# Library statistics (mapped genes)
hodos2020_allsigF <- hodos2020_allsig %>%
                     group_by(uniprot) %>%
                     slice(1) %>%                # Remove duplicate uniprot IDs
                     ungroup() %>%
                     filter(!is.na(uniprot))     # Remove unknowns

n_hodos2020_allsig  <- nrow(hodos2020_allsig)
n_hodos2020_allsigF <- nrow(hodos2020_allsigF)

CF differentially expressed genes:
* All data: 14371 genes.
* After name updating and excluding non-protein coding genes: 7286 genes.

Core Gene Signatures:
* All data: 88972 genes.
* After name updating and excluding non-protein coding genes: 18712 genes.

All Gene Signatures:
* All data: 10965 genes.
* After name updating and excluding non-protein coding genes: 6459 genes.

Primary traffic hits & Hodos2020 DEG

id symbol uniprot name
1 ABR Q12979 Active breakpoint cluster region-related protein
2 ACOT4 Q8N9L9 Peroxisomal succinyl-coenzyme A thioesterase
3 ADAMTS6 Q9UKP5 A disintegrin and metalloproteinase with thrombospondin motifs 6
4 APOBEC3A P31941 DNA dC->dU-editing enzyme APOBEC-3A
5 ARL4D P49703 ADP-ribosylation factor-like protein 4D
6 ATP1B3 P54709 Sodium/potassium-transporting ATPase subunit beta-3
7 ATXN1 P54253 Ataxin-1
8 C3AR1 Q16581 C3a anaphylatoxin chemotactic receptor
9 CCR8 P51685 C-C chemokine receptor type 8
10 CCT5 P48643 T-complex protein 1 subunit epsilon
11 CDH2 P19022 Cadherin-2
12 CITED2 Q99967 Cbp/p300-interacting transactivator 2
13 CLK1 P49759 Dual specificity protein kinase CLK1
14 DAB2 P98082 Disabled homolog 2
15 DCP1A Q9NPI6 mRNA-decapping enzyme 1A
16 DDX5 P17844 Probable ATP-dependent RNA helicase DDX5
17 DRD4 P21917 D(4) dopamine receptor
18 EIF4G3 O43432 Eukaryotic translation initiation factor 4 gamma 3
19 ETV4 P43268 ETS translocation variant 4
20 EYA3 Q99504 Eyes absent homolog 3
21 F5 P12259 Coagulation factor V
22 FER P16591 Tyrosine-protein kinase Fer
23 FGD6 Q6ZV73 FYVE, RhoGEF and PH domain-containing protein 6
24 FHL3 Q13643 Four and a half LIM domains protein 3
25 GALK2 Q01415 N-acetylgalactosamine kinase
26 GJB2 P29033 Gap junction beta-2 protein
27 GLP1R P43220 Glucagon-like peptide 1 receptor
28 GNG4 P50150 Guanine nucleotide-binding protein G(I)/G(S)/G(O) subunit gamma-4
29 GPR162 Q16538 Probable G-protein coupled receptor 162
30 GZF1 Q9H116 GDNF-inducible zinc finger protein 1
31 HS2ST1 Q7LGA3 Heparan sulfate 2-O-sulfotransferase 1
32 ITPK1 Q13572 Inositol-tetrakisphosphate 1-kinase
33 JAG2 Q9Y219 Protein jagged-2
34 KLF15 Q9UIH9 Krueppel-like factor 15
35 LAMB2 P55268 Laminin subunit beta-2
36 LDLRAD3 Q86YD5 Low-density lipoprotein receptor class A domain-containing protein 3
37 LMO2 P25791 Rhombotin-2
38 LMO3 Q8TAP4 LIM domain only protein 3
39 MAK P20794 Serine/threonine-protein kinase MAK
40 MC5R P33032 Melanocortin receptor 5
41 MFHAS1 Q9Y4C4 Malignant fibrous histiocytoma-amplified sequence 1
42 MGAT1 P26572 Alpha-1,3-mannosyl-glycoprotein 2-beta-N-acetylglucosaminyltransferase
43 MICB Q29980 MHC class I polypeptide-related sequence B
44 MMS19 Q96T76 MMS19 nucleotide excision repair protein homolog
45 MYH14 Q7Z406 Myosin-14
46 NCBP2 P52298 Nuclear cap-binding protein subunit 2
47 NDUFB2 O95178 NADH dehydrogenase [ubiquinone] 1 beta subcomplex subunit 2, mitochondrial
48 NFIL3 Q16649 Nuclear factor interleukin-3-regulated protein
49 NRDC O43847 Nardilysin
50 NRIP1 P48552 Nuclear receptor-interacting protein 1
51 NTNG2 Q96CW9 Netrin-G2
52 OXSR1 O95747 Serine/threonine-protein kinase OSR1
53 PAFAH2 Q99487 Platelet-activating factor acetylhydrolase 2, cytoplasmic
54 PARG Q86W56 Poly(ADP-ribose) glycohydrolase
55 PCDH17 O14917 Protocadherin-17
56 PCDH19 Q8TAB3 Protocadherin-19
57 PDE8A O60658 High affinity cAMP-specific and IBMX-insensitive 3’,5’-cyclic phosphodiesterase 8A
58 PKMYT1 Q99640 Membrane-associated tyrosine- and threonine-specific cdc2-inhibitory kinase
59 PLK2 Q9NYY3 Serine/threonine-protein kinase PLK2
60 PPP1R3D O95685 Protein phosphatase 1 regulatory subunit 3D
61 PRICKLE1 Q96MT3 Prickle-like protein 1
62 PRSS12 P56730 Neurotrypsin
63 PTER Q96BW5 Phosphotriesterase-related protein
64 PTGER4 P35408 Prostaglandin E2 receptor EP4 subtype
65 PTOV1 Q86YD1 Prostate tumor-overexpressed gene 1 protein
66 PTPRJ Q12913 Receptor-type tyrosine-protein phosphatase eta
67 RAB4B P61018 Ras-related protein Rab-4B
68 RB1 P06400 Retinoblastoma-associated protein
69 RNASE3 P12724 Eosinophil cationic protein
70 SCPEP1 Q9HB40 Retinoid-inducible serine carboxypeptidase
71 SEMA6B Q9H3T3 Semaphorin-6B
72 SGPP1 Q9BX95 Sphingosine-1-phosphate phosphatase 1
73 SIPA1 Q96FS4 Signal-induced proliferation-associated protein 1
74 SIX4 Q9UIU6 Homeobox protein SIX4
75 SLC25A37 Q9NYZ2 Mitoferrin-1
76 SLC2A13 Q96QE2 Proton myo-inositol cotransporter
77 SLC30A1 Q9Y6M5 Zinc transporter 1
78 SREBF1 P36956 Sterol regulatory element-binding protein 1
79 STYK1 Q6J9G0 Tyrosine-protein kinase STYK1
80 SYNJ2 O15056 Synaptojanin-2
81 TEKT2 Q9UIF3 Tektin-2
82 TRUB1 Q8WWH5 Probable tRNA pseudouridine synthase 1
83 TXNDC9 O14530 Thioredoxin domain-containing protein 9
84 UQCRC1 P31930 Cytochrome b-c1 complex subunit 1, mitochondrial
85 VWF P04275 von Willebrand factor
86 ZNF16 P17020 Zinc finger protein 16
87 ZNF438 Q7Z4V0 Zinc finger protein 438
88 ZNF616 Q08AN1 Zinc finger protein 616
89 ZNF662 Q6ZS27 Zinc finger protein 662
90 ZNF692 Q9BU19 Zinc finger protein 692

Confirmed traffic hits & Hodos2020 DEG

id symbol uniprot name
1 ADAMTS6 Q9UKP5 A disintegrin and metalloproteinase with thrombospondin motifs 6
2 CITED2 Q99967 Cbp/p300-interacting transactivator 2
3 GJB2 P29033 Gap junction beta-2 protein
4 LDLRAD3 Q86YD5 Low-density lipoprotein receptor class A domain-containing protein 3
5 NTNG2 Q96CW9 Netrin-G2
6 OXSR1 O95747 Serine/threonine-protein kinase OSR1
7 SLC30A1 Q9Y6M5 Zinc transporter 1
8 STYK1 Q6J9G0 Tyrosine-protein kinase STYK1

Primary traffic hits & Hodos2020 core gene signatures

id symbol uniprot name
1 ABCB4 P21439 Phosphatidylcholine translocator ABCB4
2 ABR Q12979 Active breakpoint cluster region-related protein
3 ACOT4 Q8N9L9 Peroxisomal succinyl-coenzyme A thioesterase
4 ACSBG2 Q5FVE4 Long-chain-fatty-acid–CoA ligase ACSBG2
5 ACTR8 Q9H981 Actin-related protein 8
6 ADAMTS6 Q9UKP5 A disintegrin and metalloproteinase with thrombospondin motifs 6
7 ADCY10 Q96PN6 Adenylate cyclase type 10
8 ADORA1 P30542 Adenosine receptor A1
9 AKT2 P31751 RAC-beta serine/threonine-protein kinase
10 APOA5 Q6Q788 Apolipoprotein A-V
11 APOB P04114 Apolipoprotein B-100
12 APOBEC1 P41238 C->U-editing enzyme APOBEC-1
13 APOBEC3A P31941 DNA dC->dU-editing enzyme APOBEC-3A
14 ARHGEF2 Q92974 Rho guanine nucleotide exchange factor 2
15 ARL4D P49703 ADP-ribosylation factor-like protein 4D
16 ARVCF O00192 Armadillo repeat protein deleted in velo-cardio-facial syndrome
17 ASMT P46597 Acetylserotonin O-methyltransferase
18 ATP1B3 P54709 Sodium/potassium-transporting ATPase subunit beta-3
19 ATXN1 P54253 Ataxin-1
20 AURKA O14965 Aurora kinase A
21 BAG3 O95817 BAG family molecular chaperone regulator 3
22 BANK1 Q8NDB2 B-cell scaffold protein with ankyrin repeats
23 BCL6B Q8N143 B-cell CLL/lymphoma 6 member B protein
24 BLK P51451 Tyrosine-protein kinase Blk
25 C3AR1 Q16581 C3a anaphylatoxin chemotactic receptor
26 CACNA1C Q13936 Voltage-dependent L-type calcium channel subunit alpha-1C
27 CAPN12 Q6ZSI9 Calpain-12
28 CCL17 Q92583 C-C motif chemokine 17
29 CCL27 Q9Y4X3 C-C motif chemokine 27
30 CCR8 P51685 C-C chemokine receptor type 8
31 CCT5 P48643 T-complex protein 1 subunit epsilon
32 CCT8L2 Q96SF2 T-complex protein 1 subunit theta-like 2
33 CDH2 P19022 Cadherin-2
34 CHRM1 P11229 Muscarinic acetylcholine receptor M1
35 CITED2 Q99967 Cbp/p300-interacting transactivator 2
36 CLDN4 O14493 Claudin-4
37 CLK1 P49759 Dual specificity protein kinase CLK1
38 CNKSR1 Q969H4 Connector enhancer of kinase suppressor of ras 1
39 CNR2 P34972 Cannabinoid receptor 2
40 COL5A1 P20908 Collagen alpha-1(V) chain
41 CPZ Q66K79 Carboxypeptidase Z
42 CREBBP Q92793 CREB-binding protein
43 CRX O43186 Cone-rod homeobox protein
44 CUBN O60494 Cubilin
45 CUL5 Q93034 Cullin-5
46 CYP19A1 P11511 Aromatase
47 CYSLTR2 Q9NS75 Cysteinyl leukotriene receptor 2
48 DAB2 P98082 Disabled homolog 2
49 DCP1A Q9NPI6 mRNA-decapping enzyme 1A
50 DCSTAMP Q9H295 Dendritic cell-specific transmembrane protein
51 DDR2 Q16832 Discoidin domain-containing receptor 2
52 DDX5 P17844 Probable ATP-dependent RNA helicase DDX5
53 DGKG P49619 Diacylglycerol kinase gamma
54 DGUOK Q16854 Deoxyguanosine kinase, mitochondrial
55 DNAJB7 Q7Z6W7 DnaJ homolog subfamily B member 7
56 DNAJC8 O75937 DnaJ homolog subfamily C member 8
57 DPEP2 Q9H4A9 Dipeptidase 2
58 DRD4 P21917 D(4) dopamine receptor
59 DRD5 P21918 D(1B) dopamine receptor
60 DYRK3 O43781 Dual specificity tyrosine-phosphorylation-regulated kinase 3
61 EIF4G3 O43432 Eukaryotic translation initiation factor 4 gamma 3
62 EMILIN2 Q9BXX0 EMILIN-2
63 EPN3 Q9H201 Epsin-3
64 EPS15L1 Q9UBC2 Epidermal growth factor receptor substrate 15-like 1
65 ETV4 P43268 ETS translocation variant 4
66 EVI5L Q96CN4 EVI5-like protein
67 EYA3 Q99504 Eyes absent homolog 3
68 EZH1 Q92800 Histone-lysine N-methyltransferase EZH1
69 F5 P12259 Coagulation factor V
70 FER P16591 Tyrosine-protein kinase Fer
71 FGD6 Q6ZV73 FYVE, RhoGEF and PH domain-containing protein 6
72 FGL1 Q08830 Fibrinogen-like protein 1
73 FHL3 Q13643 Four and a half LIM domains protein 3
74 FOLR2 P14207 Folate receptor beta
75 FRK P42685 Tyrosine-protein kinase FRK
76 FSD1 Q9BTV5 Fibronectin type III and SPRY domain-containing protein 1
77 GALK2 Q01415 N-acetylgalactosamine kinase
78 GALNS P34059 N-acetylgalactosamine-6-sulfatase
79 GDPD5 Q8WTR4 Glycerophosphodiester phosphodiesterase domain-containing protein 5
80 GJA8 P48165 Gap junction alpha-8 protein
81 GJB2 P29033 Gap junction beta-2 protein
82 GLP1R P43220 Glucagon-like peptide 1 receptor
83 GNG4 P50150 Guanine nucleotide-binding protein G(I)/G(S)/G(O) subunit gamma-4
84 GOSR2 O14653 Golgi SNAP receptor complex member 2
85 GPR132 Q9UNW8 Probable G-protein coupled receptor 132
86 GPR162 Q16538 Probable G-protein coupled receptor 162
87 GRK3 P35626 Beta-adrenergic receptor kinase 2
88 GRK5 P34947 G protein-coupled receptor kinase 5
89 GUCY2D Q02846 Retinal guanylyl cyclase 1
90 GZF1 Q9H116 GDNF-inducible zinc finger protein 1
91 HELLS Q9NRZ9 Lymphoid-specific helicase
92 HOXD10 P28358 Homeobox protein Hox-D10
93 HS2ST1 Q7LGA3 Heparan sulfate 2-O-sulfotransferase 1
94 HTR1E P28566 5-hydroxytryptamine receptor 1E
95 IL24 Q13007 Interleukin-24
96 ISL2 Q96A47 Insulin gene enhancer protein ISL-2
97 ITGA11 Q9UKX5 Integrin alpha-11
98 ITPK1 Q13572 Inositol-tetrakisphosphate 1-kinase
99 JAG2 Q9Y219 Protein jagged-2
100 KCNIP2 Q9NS61 Kv channel-interacting protein 2
101 KCNK10 P57789 Potassium channel subfamily K member 10
102 KIF17 Q9P2E2 Kinesin-like protein KIF17
103 KLF15 Q9UIH9 Krueppel-like factor 15
104 KLK7 P49862 Kallikrein-7
105 LAMA3 Q16787 Laminin subunit alpha-3
106 LAMB1 P07942 Laminin subunit beta-1
107 LAMB2 P55268 Laminin subunit beta-2
108 LDLRAD3 Q86YD5 Low-density lipoprotein receptor class A domain-containing protein 3
109 LEPROT O15243 Leptin receptor gene-related protein
110 LIN9 Q5TKA1 Protein lin-9 homolog
111 LMO2 P25791 Rhombotin-2
112 LMO3 Q8TAP4 LIM domain only protein 3
113 LMTK3 Q96Q04 Serine/threonine-protein kinase LMTK3
114 LRP1B Q9NZR2 Low-density lipoprotein receptor-related protein 1B
115 LRRK1 Q38SD2 Leucine-rich repeat serine/threonine-protein kinase 1
116 MAK P20794 Serine/threonine-protein kinase MAK
117 MC5R P33032 Melanocortin receptor 5
118 MFHAS1 Q9Y4C4 Malignant fibrous histiocytoma-amplified sequence 1
119 MGAT1 P26572 Alpha-1,3-mannosyl-glycoprotein 2-beta-N-acetylglucosaminyltransferase
120 MICB Q29980 MHC class I polypeptide-related sequence B
121 MIER1 Q8N108 Mesoderm induction early response protein 1
122 MMS19 Q96T76 MMS19 nucleotide excision repair protein homolog
123 MOB3B Q86TA1 MOB kinase activator 3B
124 MPST P25325 3-mercaptopyruvate sulfurtransferase
125 MYH14 Q7Z406 Myosin-14
126 NAALADL1 Q9UQQ1 Aminopeptidase NAALADL1
127 NCBP2 P52298 Nuclear cap-binding protein subunit 2
128 NDUFB2 O95178 NADH dehydrogenase [ubiquinone] 1 beta subcomplex subunit 2, mitochondrial
129 NFIL3 Q16649 Nuclear factor interleukin-3-regulated protein
130 NOVA1 P51513 RNA-binding protein Nova-1
131 NR2E1 Q9Y466 Nuclear receptor subfamily 2 group E member 1
132 NRDC O43847 Nardilysin
133 NRIP1 P48552 Nuclear receptor-interacting protein 1
134 NTNG2 Q96CW9 Netrin-G2
135 OXSR1 O95747 Serine/threonine-protein kinase OSR1
136 PABPC3 Q9H361 Polyadenylate-binding protein 3
137 PAFAH2 Q99487 Platelet-activating factor acetylhydrolase 2, cytoplasmic
138 PAK6 Q9NQU5 Serine/threonine-protein kinase PAK 6
139 PAPOLB Q9NRJ5 Poly(A) polymerase beta
140 PARD3 Q8TEW0 Partitioning defective 3 homolog
141 PARG Q86W56 Poly(ADP-ribose) glycohydrolase
142 PCDH17 O14917 Protocadherin-17
143 PCDH19 Q8TAB3 Protocadherin-19
144 PCDHB2 Q9Y5E7 Protocadherin beta-2
145 PCDHB8 Q9UN66 Protocadherin beta-8
146 PCSK1 P29120 Neuroendocrine convertase 1
147 PDE8A O60658 High affinity cAMP-specific and IBMX-insensitive 3’,5’-cyclic phosphodiesterase 8A
148 PHKG1 Q16816 Phosphorylase b kinase gamma catalytic chain, skeletal muscle/heart isoform
149 PIK3R5 Q8WYR1 Phosphoinositide 3-kinase regulatory subunit 5
150 PKMYT1 Q99640 Membrane-associated tyrosine- and threonine-specific cdc2-inhibitory kinase
151 PLA2G12A Q9BZM1 Group XIIA secretory phospholipase A2
152 PLCZ1 Q86YW0 1-phosphatidylinositol 4,5-bisphosphate phosphodiesterase zeta-1
153 PLK2 Q9NYY3 Serine/threonine-protein kinase PLK2
154 PMP2 P02689 Myelin P2 protein
155 PPP1R1B Q9UD71 Protein phosphatase 1 regulatory subunit 1B
156 PPP1R3D O95685 Protein phosphatase 1 regulatory subunit 3D
157 PRICKLE1 Q96MT3 Prickle-like protein 1
158 PRSS12 P56730 Neurotrypsin
159 PSMA6 P60900 Proteasome subunit alpha type-6
160 PSMB1 P20618 Proteasome subunit beta type-1
161 PTER Q96BW5 Phosphotriesterase-related protein
162 PTGER4 P35408 Prostaglandin E2 receptor EP4 subtype
163 PTOV1 Q86YD1 Prostate tumor-overexpressed gene 1 protein
164 PTPRJ Q12913 Receptor-type tyrosine-protein phosphatase eta
165 QRSL1 Q9H0R6 Glutamyl-tRNA(Gln) amidotransferase subunit A, mitochondrial
166 RAB4B P61018 Ras-related protein Rab-4B
167 RANBP9 Q96S59 Ran-binding protein 9
168 RB1 P06400 Retinoblastoma-associated protein
169 RECQL5 O94762 ATP-dependent DNA helicase Q5
170 RELA Q04206 Transcription factor p65
171 RHCE P18577 Blood group Rh(CE) polypeptide
172 RNASE3 P12724 Eosinophil cationic protein
173 SCPEP1 Q9HB40 Retinoid-inducible serine carboxypeptidase
174 SELE P16581 E-selectin
175 SEMA6B Q9H3T3 Semaphorin-6B
176 SGPP1 Q9BX95 Sphingosine-1-phosphate phosphatase 1
177 SIPA1 Q96FS4 Signal-induced proliferation-associated protein 1
178 SIX4 Q9UIU6 Homeobox protein SIX4
179 SLC13A1 Q9BZW2 Solute carrier family 13 member 1
180 SLC14A1 Q13336 Urea transporter 1
181 SLC17A1 Q14916 Sodium-dependent phosphate transport protein 1
182 SLC25A37 Q9NYZ2 Mitoferrin-1
183 SLC2A13 Q96QE2 Proton myo-inositol cotransporter
184 SLC30A1 Q9Y6M5 Zinc transporter 1
185 SLC34A1 Q06495 Sodium-dependent phosphate transport protein 2A
186 SLC37A4 O43826 Glucose-6-phosphate exchanger SLC37A4
187 SLC39A1 Q9NY26 Zinc transporter ZIP1
188 SLC52A2 Q9HAB3 Solute carrier family 52, riboflavin transporter, member 2
189 SLC5A6 Q9Y289 Sodium-dependent multivitamin transporter
190 SLC6A15 Q9H2J7 Sodium-dependent neutral amino acid transporter B(0)AT2
191 SLC9A4 Q6AI14 Sodium/hydrogen exchanger 4
192 SNTA1 Q13424 Alpha-1-syntrophin
193 SNX6 Q9UNH7 Sorting nexin-6
194 SREBF1 P36956 Sterol regulatory element-binding protein 1
195 STK32C Q86UX6 Serine/threonine-protein kinase 32C
196 STYK1 Q6J9G0 Tyrosine-protein kinase STYK1
197 SYNJ2 O15056 Synaptojanin-2
198 TEKT2 Q9UIF3 Tektin-2
199 TIMM10 P62072 Mitochondrial import inner membrane translocase subunit Tim10
200 TPCN1 Q9ULQ1 Two pore calcium channel protein 1
201 TPK1 Q9H3S4 Thiamin pyrophosphokinase 1
202 TRIM68 Q6AZZ1 E3 ubiquitin-protein ligase TRIM68
203 TRUB1 Q8WWH5 Probable tRNA pseudouridine synthase 1
204 TSC22D2 O75157 TSC22 domain family protein 2
205 TXNDC9 O14530 Thioredoxin domain-containing protein 9
206 UBE2I P63279 SUMO-conjugating enzyme UBC9
207 UNC5B Q8IZJ1 Netrin receptor UNC5B
208 UQCRC1 P31930 Cytochrome b-c1 complex subunit 1, mitochondrial
209 UQCRFS1 P47985 Cytochrome b-c1 complex subunit Rieske, mitochondrial
210 UROD P06132 Uroporphyrinogen decarboxylase
211 VN1R2 Q8NFZ6 Vomeronasal type-1 receptor 2
212 VPS26A O75436 Vacuolar protein sorting-associated protein 26A
213 VWF P04275 von Willebrand factor
214 ZBTB25 P24278 Zinc finger and BTB domain-containing protein 25
215 ZBTB48 P10074 Telomere zinc finger-associated protein
216 ZNF141 Q15928 Zinc finger protein 141
217 ZNF16 P17020 Zinc finger protein 16
218 ZNF19 P17023 Zinc finger protein 19
219 ZNF219 Q9P2Y4 Zinc finger protein 219
220 ZNF384 Q8TF68 Zinc finger protein 384
221 ZNF438 Q7Z4V0 Zinc finger protein 438
222 ZNF565 Q8N9K5 Zinc finger protein 565
223 ZNF616 Q08AN1 Zinc finger protein 616
224 ZNF662 Q6ZS27 Zinc finger protein 662
225 ZNF678 Q5SXM1 Zinc finger protein 678
226 ZNF692 Q9BU19 Zinc finger protein 692

Confirmed traffic hits & Hodos2020 core gene signatures

id symbol uniprot name
1 ACSBG2 Q5FVE4 Long-chain-fatty-acid–CoA ligase ACSBG2
2 ADAMTS6 Q9UKP5 A disintegrin and metalloproteinase with thrombospondin motifs 6
3 APOB P04114 Apolipoprotein B-100
4 ARVCF O00192 Armadillo repeat protein deleted in velo-cardio-facial syndrome
5 CCL27 Q9Y4X3 C-C motif chemokine 27
6 CITED2 Q99967 Cbp/p300-interacting transactivator 2
7 CLDN4 O14493 Claudin-4
8 COL5A1 P20908 Collagen alpha-1(V) chain
9 CPZ Q66K79 Carboxypeptidase Z
10 CREBBP Q92793 CREB-binding protein
11 CYSLTR2 Q9NS75 Cysteinyl leukotriene receptor 2
12 DCSTAMP Q9H295 Dendritic cell-specific transmembrane protein
13 EVI5L Q96CN4 EVI5-like protein
14 FOLR2 P14207 Folate receptor beta
15 GJB2 P29033 Gap junction beta-2 protein
16 GUCY2D Q02846 Retinal guanylyl cyclase 1
17 HELLS Q9NRZ9 Lymphoid-specific helicase
18 IL24 Q13007 Interleukin-24
19 ISL2 Q96A47 Insulin gene enhancer protein ISL-2
20 KIF17 Q9P2E2 Kinesin-like protein KIF17
21 LDLRAD3 Q86YD5 Low-density lipoprotein receptor class A domain-containing protein 3
22 LIN9 Q5TKA1 Protein lin-9 homolog
23 LRRK1 Q38SD2 Leucine-rich repeat serine/threonine-protein kinase 1
24 NOVA1 P51513 RNA-binding protein Nova-1
25 NTNG2 Q96CW9 Netrin-G2
26 OXSR1 O95747 Serine/threonine-protein kinase OSR1
27 PCDHB2 Q9Y5E7 Protocadherin beta-2
28 QRSL1 Q9H0R6 Glutamyl-tRNA(Gln) amidotransferase subunit A, mitochondrial
29 RECQL5 O94762 ATP-dependent DNA helicase Q5
30 SLC30A1 Q9Y6M5 Zinc transporter 1
31 STYK1 Q6J9G0 Tyrosine-protein kinase STYK1
32 TPK1 Q9H3S4 Thiamin pyrophosphokinase 1
33 VPS26A O75436 Vacuolar protein sorting-associated protein 26A
34 ZNF141 Q15928 Zinc finger protein 141
35 ZNF384 Q8TF68 Zinc finger protein 384

Primary traffic hits & Hodos2020 all gene signatures

id symbol uniprot name
1 ABCB4 P21439 Phosphatidylcholine translocator ABCB4
2 ABR Q12979 Active breakpoint cluster region-related protein
3 ADORA1 P30542 Adenosine receptor A1
4 AKT2 P31751 RAC-beta serine/threonine-protein kinase
5 ASMT P46597 Acetylserotonin O-methyltransferase
6 ATP1B3 P54709 Sodium/potassium-transporting ATPase subunit beta-3
7 AURKA O14965 Aurora kinase A
8 BAG3 O95817 BAG family molecular chaperone regulator 3
9 BANK1 Q8NDB2 B-cell scaffold protein with ankyrin repeats
10 BCL6B Q8N143 B-cell CLL/lymphoma 6 member B protein
11 BLK P51451 Tyrosine-protein kinase Blk
12 C3AR1 Q16581 C3a anaphylatoxin chemotactic receptor
13 CACNA1C Q13936 Voltage-dependent L-type calcium channel subunit alpha-1C
14 CCL27 Q9Y4X3 C-C motif chemokine 27
15 CCR8 P51685 C-C chemokine receptor type 8
16 CCT5 P48643 T-complex protein 1 subunit epsilon
17 CDH2 P19022 Cadherin-2
18 CHRM1 P11229 Muscarinic acetylcholine receptor M1
19 CLDN4 O14493 Claudin-4
20 CLK1 P49759 Dual specificity protein kinase CLK1
21 CNR2 P34972 Cannabinoid receptor 2
22 COL5A1 P20908 Collagen alpha-1(V) chain
23 CUL5 Q93034 Cullin-5
24 CYSLTR2 Q9NS75 Cysteinyl leukotriene receptor 2
25 DAB2 P98082 Disabled homolog 2
26 DDR2 Q16832 Discoidin domain-containing receptor 2
27 DDX5 P17844 Probable ATP-dependent RNA helicase DDX5
28 DGKG P49619 Diacylglycerol kinase gamma
29 DGUOK Q16854 Deoxyguanosine kinase, mitochondrial
30 DPEP2 Q9H4A9 Dipeptidase 2
31 DRD4 P21917 D(4) dopamine receptor
32 DRD5 P21918 D(1B) dopamine receptor
33 DYRK3 O43781 Dual specificity tyrosine-phosphorylation-regulated kinase 3
34 EPN3 Q9H201 Epsin-3
35 EVI5L Q96CN4 EVI5-like protein
36 EZH1 Q92800 Histone-lysine N-methyltransferase EZH1
37 F5 P12259 Coagulation factor V
38 FER P16591 Tyrosine-protein kinase Fer
39 FGL1 Q08830 Fibrinogen-like protein 1
40 FOLR2 P14207 Folate receptor beta
41 FRK P42685 Tyrosine-protein kinase FRK
42 FSD1 Q9BTV5 Fibronectin type III and SPRY domain-containing protein 1
43 GALK2 Q01415 N-acetylgalactosamine kinase
44 GALNS P34059 N-acetylgalactosamine-6-sulfatase
45 GDPD5 Q8WTR4 Glycerophosphodiester phosphodiesterase domain-containing protein 5
46 GJA8 P48165 Gap junction alpha-8 protein
47 GJB2 P29033 Gap junction beta-2 protein
48 GLP1R P43220 Glucagon-like peptide 1 receptor
49 GOSR2 O14653 Golgi SNAP receptor complex member 2
50 GPR132 Q9UNW8 Probable G-protein coupled receptor 132
51 GPR162 Q16538 Probable G-protein coupled receptor 162
52 GRK3 P35626 Beta-adrenergic receptor kinase 2
53 GRK5 P34947 G protein-coupled receptor kinase 5
54 GUCY2D Q02846 Retinal guanylyl cyclase 1
55 HS2ST1 Q7LGA3 Heparan sulfate 2-O-sulfotransferase 1
56 HTR1E P28566 5-hydroxytryptamine receptor 1E
57 IL24 Q13007 Interleukin-24
58 ITGA11 Q9UKX5 Integrin alpha-11
59 ITPK1 Q13572 Inositol-tetrakisphosphate 1-kinase
60 JAG2 Q9Y219 Protein jagged-2
61 KCNIP2 Q9NS61 Kv channel-interacting protein 2
62 KCNK10 P57789 Potassium channel subfamily K member 10
63 LAMA3 Q16787 Laminin subunit alpha-3
64 LAMB1 P07942 Laminin subunit beta-1
65 LAMB2 P55268 Laminin subunit beta-2
66 LDLRAD3 Q86YD5 Low-density lipoprotein receptor class A domain-containing protein 3
67 LEPROT O15243 Leptin receptor gene-related protein
68 LMTK3 Q96Q04 Serine/threonine-protein kinase LMTK3
69 LRP1B Q9NZR2 Low-density lipoprotein receptor-related protein 1B
70 LRRK1 Q38SD2 Leucine-rich repeat serine/threonine-protein kinase 1
71 MAK P20794 Serine/threonine-protein kinase MAK
72 MC5R P33032 Melanocortin receptor 5
73 MFHAS1 Q9Y4C4 Malignant fibrous histiocytoma-amplified sequence 1
74 MMS19 Q96T76 MMS19 nucleotide excision repair protein homolog
75 MPST P25325 3-mercaptopyruvate sulfurtransferase
76 MYH14 Q7Z406 Myosin-14
77 NAALADL1 Q9UQQ1 Aminopeptidase NAALADL1
78 NCBP2 P52298 Nuclear cap-binding protein subunit 2
79 NOVA1 P51513 RNA-binding protein Nova-1
80 NR2E1 Q9Y466 Nuclear receptor subfamily 2 group E member 1
81 NRDC O43847 Nardilysin
82 NTNG2 Q96CW9 Netrin-G2
83 OXSR1 O95747 Serine/threonine-protein kinase OSR1
84 PABPC3 Q9H361 Polyadenylate-binding protein 3
85 PAFAH2 Q99487 Platelet-activating factor acetylhydrolase 2, cytoplasmic
86 PAK6 Q9NQU5 Serine/threonine-protein kinase PAK 6
87 PARD3 Q8TEW0 Partitioning defective 3 homolog
88 PARG Q86W56 Poly(ADP-ribose) glycohydrolase
89 PCDH17 O14917 Protocadherin-17
90 PCDH19 Q8TAB3 Protocadherin-19
91 PCDHB2 Q9Y5E7 Protocadherin beta-2
92 PCDHB8 Q9UN66 Protocadherin beta-8
93 PDE8A O60658 High affinity cAMP-specific and IBMX-insensitive 3’,5’-cyclic phosphodiesterase 8A
94 PHKG1 Q16816 Phosphorylase b kinase gamma catalytic chain, skeletal muscle/heart isoform
95 PKMYT1 Q99640 Membrane-associated tyrosine- and threonine-specific cdc2-inhibitory kinase
96 PLA2G12A Q9BZM1 Group XIIA secretory phospholipase A2
97 PLCZ1 Q86YW0 1-phosphatidylinositol 4,5-bisphosphate phosphodiesterase zeta-1
98 PLK2 Q9NYY3 Serine/threonine-protein kinase PLK2
99 PRSS12 P56730 Neurotrypsin
100 PSMA6 P60900 Proteasome subunit alpha type-6
101 PSMB1 P20618 Proteasome subunit beta type-1
102 PTGER4 P35408 Prostaglandin E2 receptor EP4 subtype
103 RAB4B P61018 Ras-related protein Rab-4B
104 RB1 P06400 Retinoblastoma-associated protein
105 RECQL5 O94762 ATP-dependent DNA helicase Q5
106 RELA Q04206 Transcription factor p65
107 RHCE P18577 Blood group Rh(CE) polypeptide
108 SELE P16581 E-selectin
109 SEMA6B Q9H3T3 Semaphorin-6B
110 SIPA1 Q96FS4 Signal-induced proliferation-associated protein 1
111 SLC13A1 Q9BZW2 Solute carrier family 13 member 1
112 SLC14A1 Q13336 Urea transporter 1
113 SLC17A1 Q14916 Sodium-dependent phosphate transport protein 1
114 SLC2A13 Q96QE2 Proton myo-inositol cotransporter
115 SLC30A1 Q9Y6M5 Zinc transporter 1
116 SLC34A1 Q06495 Sodium-dependent phosphate transport protein 2A
117 SLC37A4 O43826 Glucose-6-phosphate exchanger SLC37A4
118 SLC39A1 Q9NY26 Zinc transporter ZIP1
119 SLC5A6 Q9Y289 Sodium-dependent multivitamin transporter
120 SLC6A15 Q9H2J7 Sodium-dependent neutral amino acid transporter B(0)AT2
121 SLC9A4 Q6AI14 Sodium/hydrogen exchanger 4
122 SNX6 Q9UNH7 Sorting nexin-6
123 STK32C Q86UX6 Serine/threonine-protein kinase 32C
124 STYK1 Q6J9G0 Tyrosine-protein kinase STYK1
125 TPCN1 Q9ULQ1 Two pore calcium channel protein 1
126 TPK1 Q9H3S4 Thiamin pyrophosphokinase 1
127 TRIM68 Q6AZZ1 E3 ubiquitin-protein ligase TRIM68
128 TRUB1 Q8WWH5 Probable tRNA pseudouridine synthase 1
129 UBE2I P63279 SUMO-conjugating enzyme UBC9
130 UNC5B Q8IZJ1 Netrin receptor UNC5B
131 UQCRC1 P31930 Cytochrome b-c1 complex subunit 1, mitochondrial
132 UROD P06132 Uroporphyrinogen decarboxylase
133 VN1R2 Q8NFZ6 Vomeronasal type-1 receptor 2
134 VWF P04275 von Willebrand factor
135 ZBTB25 P24278 Zinc finger and BTB domain-containing protein 25

Confirmed traffic hits & Hodos2020 all gene signatures

id symbol uniprot name
1 CCL27 Q9Y4X3 C-C motif chemokine 27
2 CLDN4 O14493 Claudin-4
3 COL5A1 P20908 Collagen alpha-1(V) chain
4 CYSLTR2 Q9NS75 Cysteinyl leukotriene receptor 2
5 EVI5L Q96CN4 EVI5-like protein
6 FOLR2 P14207 Folate receptor beta
7 GJB2 P29033 Gap junction beta-2 protein
8 GUCY2D Q02846 Retinal guanylyl cyclase 1
9 IL24 Q13007 Interleukin-24
10 LDLRAD3 Q86YD5 Low-density lipoprotein receptor class A domain-containing protein 3
11 LRRK1 Q38SD2 Leucine-rich repeat serine/threonine-protein kinase 1
12 NOVA1 P51513 RNA-binding protein Nova-1
13 NTNG2 Q96CW9 Netrin-G2
14 OXSR1 O95747 Serine/threonine-protein kinase OSR1
15 PCDHB2 Q9Y5E7 Protocadherin beta-2
16 RECQL5 O94762 ATP-dependent DNA helicase Q5
17 SLC30A1 Q9Y6M5 Zinc transporter 1
18 STYK1 Q6J9G0 Tyrosine-protein kinase STYK1
19 TPK1 Q9H3S4 Thiamin pyrophosphokinase 1

Lim (2022) PM interactome

Mol Syst Biol (2022) 18(2):e10629
CFTR interactome mapping using the mammalian membrane two-hybrid high-throughput screening systeme
Sang Hyun Lim, Jamie Snider, Liron Birimberg-Schwartz, Wan Ip, Joana C Serralha, Hugo M Botelho, Miquéias Lopes-Pacheco, Madalena C Pinto, Mohamed Taha Moutaoufik, Mara Zilocchi, Onofrio Laselva, Mohsen Esmaeili, Max Kotlyar, Anna Lyakisheva, Priscilla Tang, Lucía López Vázquez, Indira Akula, Farzaneh Aboualizadeh, Victoria Wong, Ingrid Grozavu, Teuta Opacak-Bernardi, Zhong Yao, Meg Mendoza, Mohan Babu, Igor Jurisica, Tanja Gonska, Christine E Bear, Margarida D Amaral, Igor Stagljar
http://dx.doi.org/10.15252/msb.202110629

Application of a high-throughput screening variant of the Mammalian Membrane Two-Hybrid (MaMTH-HTS) to map the protein-protein interactions of wild-type (wt) and mutant CFTR (F508del) in HEK293 cells. Uncovered candidate proteins with potential roles in CFTR function/CF pathophysiology, including Fibrinogen Like 2 (FGL2).

wt- and F508del-CFTR had 178 and 223 unique interactions, respectively, as well as an overlap of 46 interactors (~10.2%).

To be done

Interactome summary

# Concatenate all datasets
if(file.exists(path_intfull)){
    alldata <- read_csv(path_intfull)
} else{
    
    alldataL <- list(extdrg           = extdrgF,
                     extdrg_prihits   = extdrg_prihitsF,
                     extdrg_cnfhits   = extdrg_cnfhitsF,
                     pankow2015_core  = pankow2015_coreF,
                     pankow2015_wt    = pankow2015_wtF,
                     pankow2015_df    = pankow2015_dfF,
                     canato2018_df    = canato2018_dfF,
                     canato2018_4RK   = canato2018_4RKF,
                     santos2019_wt    = santos2019_wtF,
                     santos2019_DDAA  = santos2019_DDAAF,
                     hutt2018_wt      = hutt2018_wtF,
                     hutt2018_df      = hutt2018_dfF,
                     # rauniyar
                     almaca2013       = almaca2013F,
                     tomati2018       = tomati2018F,
                     simpson2012      = simpson2012F,
                     wang2006_wt      = wang2006_wtF,
                     wang2006_df      = wang2006_dfF,
                     reilly2017_wt    = reilly2017_wtF,
                     reilly2017_df    = reilly2017_dfF,
                     # gilchrist
                     pankow2019       = pankow2019F,
                     dang2020         = dang2020F,
                     hodos2020_deg    = hodos2020_degF,
                     hodos2020_cosig  = hodos2020_cosigF,
                     hodos2020_allsig = hodos2020_allsigF
                     # lim2022
                     )
    
    alldata <- lapply(names(alldataL), function(x){
        temp <- alldataL[[x]]
        if(!"score" %in% names(temp)) temp$score <- NA
        
        temp %>%
        mutate(dataset = x) %>%
        select(symbol, uniprot, ensembl, entrez, name, score, dataset)
    })
    alldata  <- do.call(rbind, alldata)
    
    write.csv(alldata, path_intfull)
}



# Summary: all genes, per dataset
# Which gene comes in which dataset
if(file.exists(path_intsummary)){
    datsum <- read_csv(path_intsummary)
} else{

    # One gene per line
    datsum <- alldata %>%
        mutate(symbol_uniprot = paste(symbol, uniprot)) %>%
        group_by(symbol_uniprot) %>%
        slice(1) %>%
        ungroup() %>%
        mutate(symbol = sub("^(.*) (.*)$", "\\1", symbol_uniprot)) %>%
        mutate(uniprot = sub("^(.*) (.*)$", "\\2", symbol_uniprot)) %>%
        select(symbol, uniprot)
    
    # Find genes in each dataset
    for(ds in sort(unique(alldata$dataset))){
      
        ups <- alldata %>%
               filter(dataset == ds) %>%
               pull(uniprot)
        
        bool <- sapply(datsum$uniprot, function(x) x %in% ups)
        datsum <- mutate(datsum, "{ds}" := bool)
    }

    write.csv(datsum, path_intsummary, row.names = FALSE)
}


datatable(datsum)
# head(datsum,20)

Circos plot 1

######################################################
# Subset and sort datasets for Circos plot
######################################################

# Sorting: place all confirmed hits at the beginning of each data frame.
# This should remove much of the spider web effect on the circos plot.

    selected_datasets <- c("extdrg_prihits", "extdrg_cnfhits", "pankow2015_df", "almaca2013", "tomati2018", "simpson2012", "reilly2017_df", "dang2020")
    # selected_datasets <- c("extdrg_prihits", "extdrg_cnfhits", "pankow2015_df", "almaca2013")
    key_genes <- c("DGKG", "GRK5", "LRRK1", "STYK1", "TPK1")
    LUT_minmax <- c(min(alldata %>% filter(dataset %in% c("extdrg_prihits", "extdrg_cnfhits")) %>% pull(score)), 3)

    # Confirmed screen hits
    c_extdrg_cnfhits <- extdrg_cnfhitsF %>% 
                        mutate(dataset = "extdrg_cnfhits") %>%
                        arrange(desc(score)) %>%
                        tibble::rowid_to_column("x") %>%
                        mutate(color = numeric_to_2color(score, colors=c("red", "green"), lutminmax=LUT_minmax, ncolors = 1000)) %>%
                        select(symbol, uniprot, score, color, dataset, x)
  
    # Primary screen hits (arrange according to confirmed hits)
    c_extdrg_prihits <- extdrg_prihitsF %>%
                        mutate(dataset = "extdrg_prihits") %>%
                        mutate(rank = match(uniprot, c_extdrg_cnfhits$uniprot)) %>%
                        arrange(rank, symbol) %>%
                        tibble::rowid_to_column("x") %>%
                        mutate(color = numeric_to_2color(score, colors=c("red", "green"), lutminmax=LUT_minmax, ncolors = 1000)) %>%
                        select(symbol, uniprot, score, color, dataset, x)
                        
    # Other datasets
    c_others <- lapply(selected_datasets[-c(1,2)], function(ds){
        temp <- alldata %>%
                filter(dataset == ds) %>%
                mutate(rank = match(uniprot, c_extdrg_prihits$uniprot)) %>%
                arrange(rank, symbol) %>%
                tibble::rowid_to_column("x") %>%
                mutate(color = numeric_to_2color(NA)) %>%
                select(symbol, uniprot, score, color, dataset, x)
    })
    c_others <- do.call(rbind, c_others)
    
    # combination
    c_all <- do.call(rbind, list(c_extdrg_prihits, c_extdrg_cnfhits, c_others))



######################################################
# Find genes co-occurring in 2 datasets
######################################################
  
    # Find pairwise dataset/sector combinations
    sector_combinations <- combn(selected_datasets, 2)
  
    # Find shared genes
    sharedg <- lapply(1:ncol(sector_combinations), function(j){
        ds1 <- filter(c_all, dataset == sector_combinations[1,j])
        ds2 <- filter(c_all, dataset == sector_combinations[2,j])
        
        ds1 %>%
        filter(uniprot %in% ds2$uniprot) %>%
        rename(dataset1 = dataset) %>%
        rename(x1 = x) %>%
        mutate(dataset2 = sector_combinations[2,j]) %>%
        mutate(x2 = ds2[match(uniprot, ds2$uniprot),"x", drop=T])
    })
    sharedg <- do.call(rbind, sharedg)
    
    sharedg_cnfhits <- filter(sharedg, (dataset1 == "extdrg_cnfhits" & dataset2 != "extdrg_prihits") | 
                                       (dataset2 == "extdrg_cnfhits" & dataset1 != "extdrg_prihits"))
    sharedg_prihits <- filter(sharedg, (dataset1 == "extdrg_prihits" & dataset2 != "extdrg_cnfhits") | 
                                       (dataset2 == "extdrg_prihits" & dataset1 != "extdrg_cnfhits"))
    sharedg_others  <- filter(sharedg, !dataset1 %in% c("extdrg_prihits", "extdrg_cnfhits") & 
                                       !dataset2 %in% c("extdrg_prihits", "extdrg_cnfhits"))
    sharedg_main    <- filter(sharedg, symbol %in% key_genes) %>%
                       filter((dataset1 == "extdrg_prihits" & dataset2 != "extdrg_cnfhits") | 
                              (dataset2 == "extdrg_prihits" & dataset1 != "extdrg_cnfhits"))
# Start circos
c_track1 <- filter(c_all, dataset != "extdrg_cnfhits")
c_track2 <- filter(c_all, dataset == "extdrg_cnfhits")

circos.clear()
circos.par(gap.degree = 1)
circos.par(cell.padding = c(0.02, 1.00, 0.02, 1.00))
circos.initialize(c_track1$dataset, x = c_track1$x)



# Outer track - Primary hits

    circos.track(ylim = c(0,1))

    # Add ticks and gene names
    tdatasets <- unique(c_track1$dataset)
    for(sector.index in tdatasets) {
      
        secdata <- filter(c_track1, dataset == sector.index)
        
        # Add labels
        xmid <- mean(range(secdata$x))
        circos.text(xmid, 2, sector.index, facing="bending.inside", col = "darkred", sector.index = sector.index)
        
        for(i in 1:nrow(secdata)){
            x   = secdata[i, "x", drop=T]
            lbl = secdata[i, "symbol", drop=T]
            color = secdata[i, "color", drop=T]
            if(is.na(color)) color <- "darkgray"
            circos.segments(x, 0, x, 1, col = color, lwd = 1, sector.index = sector.index)
            circos.text(x, 1, lbl, facing="clockwise", niceFacing = TRUE, adj = c(-0.5,.8), cex=.2, sector.index = sector.index)
        }
    }

    

# Inner track - confirmed hits
# COnfirmed hits are mapped within the "extdrg_prihits" sector, since they overlap

    circos.track(ylim = c(0,1), track.height=.15)
    
    # Add ticks and gene names
    sector.index <- "extdrg_prihits"
    secdata      <- c_track2
    
    for(i in 1:nrow(secdata)){
        x   = secdata[i, "x", drop=T]
        lbl = secdata[i, "symbol", drop=T]
        color = secdata[i, "color", drop=T]
        if(is.na(color)) color <- "darkgray"
        circos.segments(x, 0, x, 1, col = color, lwd = 1, sector.index = sector.index)
    }




# Link genes
    # Among non-traffic hits
    for(i in 1:nrow(sharedg_others)){
        sector1 <- sharedg_others[i, "dataset1", drop=T]
        sector2 <- sharedg_others[i, "dataset2", drop=T]
        x1      <- sharedg_others[i, "x1", drop=T]
        x2      <- sharedg_others[i, "x2", drop=T]
        circos.link(sector1, x1, sector2, x2, col="gray", lwd = .5)
        circos.segments(x1, 0, x1, 1, col = "gray", lwd = .5, sector.index = sector1)
        circos.segments(x2, 0, x2, 1, col = "gray", lwd = .5, sector.index = sector2)
    }
    
    # Among non-confirmed hits and other datasets
    for(i in 1:nrow(sharedg_prihits)){
        sector1 <- sharedg_prihits[i, "dataset1", drop=T]
        sector2 <- sharedg_prihits[i, "dataset2", drop=T]
        x1      <- sharedg_prihits[i, "x1", drop=T]
        x2      <- sharedg_prihits[i, "x2", drop=T]
        circos.link(sector1, x1, sector2, x2, col="darkred")
        if(sector1 != "extdrg_cnfhits") circos.segments(x1, 0, x1, 1, col = "darkred", lwd = 1, sector.index = sector1)
        if(sector2 != "extdrg_cnfhits") circos.segments(x2, 0, x2, 1, col = "darkred", lwd = 1, sector.index = sector2)
    }
    
    # Among confirmed hits and other datasets
    for(i in 1:nrow(sharedg_cnfhits)){
        sector1 <- sharedg_cnfhits[i, "dataset1", drop=T]
        sector2 <- sharedg_cnfhits[i, "dataset2", drop=T]
        if(sector1 == "extdrg_cnfhits") sector1 <- "extdrg_prihits"
        if(sector2 == "extdrg_cnfhits") sector2 <- "extdrg_prihits"
        x1      <- sharedg_cnfhits[i, "x1", drop=T]
        x2      <- sharedg_cnfhits[i, "x2", drop=T]
        circos.link(sector1, x1, sector2, x2, col="red", lwd = 2)
        if(sector1 != "extdrg_cnfhits") circos.segments(x1, 0, x1, 1, col = "red", lwd = 2, sector.index = sector1)
        if(sector2 != "extdrg_cnfhits") circos.segments(x2, 0, x2, 1, col = "red", lwd = 2, sector.index = sector2)
    }
    
    # Among main hits
    for(i in 1:nrow(sharedg_main)){
        sector1 <- sharedg_main[i, "dataset1", drop=T]
        sector2 <- sharedg_main[i, "dataset2", drop=T]
        x1      <- sharedg_main[i, "x1", drop=T]
        x2      <- sharedg_main[i, "x2", drop=T]
        circos.link(sector1, x1, sector2, x2, col="red", lwd = 5)
        circos.segments(x1, 0, x1, 1, col = "red", lwd = 5, sector.index = sector1)
        circos.segments(x2, 0, x2, 1, col = "red", lwd = 5, sector.index = sector2)
    }



# Highlight key genes
    alldataC_key <- filter(c_track1, symbol %in% key_genes)
    for(i in 1:nrow(alldataC_key)){
        x      <- alldataC_key[i, "x", drop=TRUE]
        sector <- alldataC_key[i, "dataset", drop=TRUE]
        lbl    <- alldataC_key[i, "symbol", drop=TRUE]
        circos.segments(x, 0, x, 1, col = "yellow", lwd = 1, sector.index = sector)
        circos.text(x, 1, lbl, facing="clockwise", niceFacing = TRUE, adj = c(2.5,0.3), cex=.5, sector.index = sector)
    }

Circos plot 2

######################################################
# Subset and sort datasets for Circos plot
######################################################

# Sorting: place all confirmed hits at the beginning of each data frame.
# This should remove much of the spider web effect on the circos plot.

selected_datasets <- c("extdrg_prihits", "extdrg_cnfhits", "pankow2015_df", "almaca2013", "canato2018_df", "tomati2018", "simpson2012", "reilly2017_df", "dang2020")
# selected_datasets <- c("extdrg_prihits", "extdrg_cnfhits", "pankow2015_df", "almaca2013")
key_genes <- c("DGKG", "GRK5", "LRRK1", "STYK1", "TPK1")
LUT_minmax <- c(min(alldata %>% filter(dataset %in% c("extdrg_prihits", "extdrg_cnfhits")) %>% pull(score)), 3)

# Confirmed screen hits
c_extdrg_cnfhits <- extdrg_cnfhitsF %>% 
  mutate(dataset = "extdrg_cnfhits") %>%
  arrange(desc(score)) %>%
  tibble::rowid_to_column("x") %>%
  mutate(color = numeric_to_2color(score, colors=c("red", "green"), lutminmax=LUT_minmax, ncolors = 1000)) %>%
  select(symbol, uniprot, score, color, dataset, x)

# Primary screen hits (arrange according to confirmed hits)
c_extdrg_prihits <- extdrg_prihitsF %>%
  mutate(dataset = "extdrg_prihits") %>%
  mutate(rank = match(uniprot, c_extdrg_cnfhits$uniprot)) %>%
  arrange(rank, symbol) %>%
  tibble::rowid_to_column("x") %>%
  mutate(color = numeric_to_2color(score, colors=c("red", "green"), lutminmax=LUT_minmax, ncolors = 1000)) %>%
  select(symbol, uniprot, score, color, dataset, x)

# Other datasets
c_others <- lapply(selected_datasets[-c(1,2)], function(ds){
  temp <- alldata %>%
    filter(dataset == ds) %>%
    mutate(rank = match(uniprot, c_extdrg_prihits$uniprot)) %>%
    arrange(rank, symbol) %>%
    tibble::rowid_to_column("x") %>%
    mutate(color = numeric_to_2color(NA)) %>%
    select(symbol, uniprot, score, color, dataset, x)
})
c_others <- do.call(rbind, c_others)

# combination
c_all <- do.call(rbind, list(c_extdrg_prihits, c_extdrg_cnfhits, c_others))



######################################################
# Find genes co-occurring in 2 datasets
######################################################

# Find pairwise dataset/sector combinations
sector_combinations <- combn(selected_datasets, 2)

# Find shared genes
sharedg <- lapply(1:ncol(sector_combinations), function(j){
  ds1 <- filter(c_all, dataset == sector_combinations[1,j])
  ds2 <- filter(c_all, dataset == sector_combinations[2,j])
  
  ds1 %>%
    filter(uniprot %in% ds2$uniprot) %>%
    rename(dataset1 = dataset) %>%
    rename(x1 = x) %>%
    mutate(dataset2 = sector_combinations[2,j]) %>%
    mutate(x2 = ds2[match(uniprot, ds2$uniprot),"x", drop=T])
})
sharedg <- do.call(rbind, sharedg)

sharedg_cnfhits <- filter(sharedg, (dataset1 == "extdrg_cnfhits" & dataset2 != "extdrg_prihits") | 
                            (dataset2 == "extdrg_cnfhits" & dataset1 != "extdrg_prihits"))
sharedg_prihits <- filter(sharedg, (dataset1 == "extdrg_prihits" & dataset2 != "extdrg_cnfhits") | 
                            (dataset2 == "extdrg_prihits" & dataset1 != "extdrg_cnfhits"))
sharedg_others  <- filter(sharedg, !dataset1 %in% c("extdrg_prihits", "extdrg_cnfhits") & 
                            !dataset2 %in% c("extdrg_prihits", "extdrg_cnfhits"))
sharedg_main    <- filter(sharedg, symbol %in% key_genes) %>%
  filter((dataset1 == "extdrg_prihits" & dataset2 != "extdrg_cnfhits") | 
           (dataset2 == "extdrg_prihits" & dataset1 != "extdrg_cnfhits"))
# Start circos
c_track1 <- filter(c_all, dataset != "extdrg_cnfhits")
c_track2 <- filter(c_all, dataset == "extdrg_cnfhits")

circos.clear()
circos.par(gap.degree = 1)
circos.par(cell.padding = c(0.02, 1.00, 0.02, 1.00))
circos.initialize(c_track1$dataset, x = c_track1$x)



# Outer track - Primary hits

circos.track(ylim = c(0,1))

# Add ticks and gene names
tdatasets <- unique(c_track1$dataset)
for(sector.index in tdatasets) {
  
  secdata <- filter(c_track1, dataset == sector.index)
  
  # Add labels
  xmid <- mean(range(secdata$x))
  circos.text(xmid, 2, sector.index, facing="bending.inside", col = "darkred", sector.index = sector.index)
  
  for(i in 1:nrow(secdata)){
    x   = secdata[i, "x", drop=T]
    lbl = secdata[i, "symbol", drop=T]
    color = secdata[i, "color", drop=T]
    if(is.na(color)) color <- "darkgray"
    circos.segments(x, 0, x, 1, col = color, lwd = 1, sector.index = sector.index)
    circos.text(x, 1, lbl, facing="clockwise", niceFacing = TRUE, adj = c(-0.5,.8), cex=.2, sector.index = sector.index)
  }
}



# Inner track - confirmed hits
# COnfirmed hits are mapped within the "extdrg_prihits" sector, since they overlap

circos.track(ylim = c(0,1), track.height=.15)

# Add ticks and gene names
sector.index <- "extdrg_prihits"
secdata      <- c_track2

for(i in 1:nrow(secdata)){
  x   = secdata[i, "x", drop=T]
  lbl = secdata[i, "symbol", drop=T]
  color = secdata[i, "color", drop=T]
  if(is.na(color)) color <- "darkgray"
  circos.segments(x, 0, x, 1, col = color, lwd = 1, sector.index = sector.index)
}




# Link genes
# Among non-traffic hits
for(i in 1:nrow(sharedg_others)){
  sector1 <- sharedg_others[i, "dataset1", drop=T]
  sector2 <- sharedg_others[i, "dataset2", drop=T]
  x1      <- sharedg_others[i, "x1", drop=T]
  x2      <- sharedg_others[i, "x2", drop=T]
  circos.link(sector1, x1, sector2, x2, col="gray", lwd = .5)
  circos.segments(x1, 0, x1, 1, col = "gray", lwd = .5, sector.index = sector1)
  circos.segments(x2, 0, x2, 1, col = "gray", lwd = .5, sector.index = sector2)
}

# Among non-confirmed hits and other datasets
for(i in 1:nrow(sharedg_prihits)){
  sector1 <- sharedg_prihits[i, "dataset1", drop=T]
  sector2 <- sharedg_prihits[i, "dataset2", drop=T]
  x1      <- sharedg_prihits[i, "x1", drop=T]
  x2      <- sharedg_prihits[i, "x2", drop=T]
  circos.link(sector1, x1, sector2, x2, col="darkred")
  if(sector1 != "extdrg_cnfhits") circos.segments(x1, 0, x1, 1, col = "darkred", lwd = 1, sector.index = sector1)
  if(sector2 != "extdrg_cnfhits") circos.segments(x2, 0, x2, 1, col = "darkred", lwd = 1, sector.index = sector2)
}

# Among confirmed hits and other datasets
for(i in 1:nrow(sharedg_cnfhits)){
  sector1 <- sharedg_cnfhits[i, "dataset1", drop=T]
  sector2 <- sharedg_cnfhits[i, "dataset2", drop=T]
  if(sector1 == "extdrg_cnfhits") sector1 <- "extdrg_prihits"
  if(sector2 == "extdrg_cnfhits") sector2 <- "extdrg_prihits"
  x1      <- sharedg_cnfhits[i, "x1", drop=T]
  x2      <- sharedg_cnfhits[i, "x2", drop=T]
  circos.link(sector1, x1, sector2, x2, col="red", lwd = 2)
  if(sector1 != "extdrg_cnfhits") circos.segments(x1, 0, x1, 1, col = "red", lwd = 2, sector.index = sector1)
  if(sector2 != "extdrg_cnfhits") circos.segments(x2, 0, x2, 1, col = "red", lwd = 2, sector.index = sector2)
}

# Among main hits
for(i in 1:nrow(sharedg_main)){
  sector1 <- sharedg_main[i, "dataset1", drop=T]
  sector2 <- sharedg_main[i, "dataset2", drop=T]
  x1      <- sharedg_main[i, "x1", drop=T]
  x2      <- sharedg_main[i, "x2", drop=T]
  circos.link(sector1, x1, sector2, x2, col="red", lwd = 5)
  circos.segments(x1, 0, x1, 1, col = "red", lwd = 5, sector.index = sector1)
  circos.segments(x2, 0, x2, 1, col = "red", lwd = 5, sector.index = sector2)
}



# Highlight key genes
alldataC_key <- filter(c_track1, symbol %in% key_genes)
for(i in 1:nrow(alldataC_key)){
  x      <- alldataC_key[i, "x", drop=TRUE]
  sector <- alldataC_key[i, "dataset", drop=TRUE]
  lbl    <- alldataC_key[i, "symbol", drop=TRUE]
  circos.segments(x, 0, x, 1, col = "yellow", lwd = 1, sector.index = sector)
  circos.text(x, 1, lbl, facing="clockwise", niceFacing = TRUE, adj = c(2.5,0.3), cex=.5, sector.index = sector)
}